Great Learning : AIML Online Capstone -AUTOMATIC TICKET ASSIGNMENT

DecA : Group 4: NLP 1

Group Members :

  1. Priya Moily
  2. Priyanka Gupta
  3. Avinash Balani
  4. Priyank Bhuch
In [ ]:
from google.colab import drive
drive.mount('/content/drive')
Mounted at /content/drive

1. Importing relavant Libraries

In [ ]:
!pip install ftfy
from time import time
from PIL import Image
from zipfile import ZipFile
import os, sys, itertools, re
import tensorflow as tf
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.model_selection import train_test_split
import plotly as py
import plotly.graph_objs as go
import plotly.express as px
from plotly.offline import init_notebook_mode, iplot, plot
from sklearn.preprocessing import QuantileTransformer
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score,f1_score,recall_score,precision_score, confusion_matrix, classification_report
import sklearn.neighbors._base

import imblearn
from imblearn.over_sampling import SMOTE

from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.initializers import Constant
from tensorflow.keras.layers import Flatten, Dense, Dropout, BatchNormalization, Activation, Conv2D, MaxPooling2D, Reshape, Embedding, LSTM,  TimeDistributed, Bidirectional, Lambda, Input, Add, GlobalMaxPool1D
from tensorflow.keras import regularizers, optimizers
from sklearn.metrics import r2_score
from tensorflow.keras.models import load_model
from tensorflow import keras
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier, KerasRegressor
#import cv2
from tensorflow.keras.applications.mobilenet import preprocess_input
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau

# to define loss
from tensorflow.keras.losses import binary_crossentropy
from tensorflow.keras.backend import log, epsilon

from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.datasets import imdb

from itertools import islice

import re
import nltk
from nltk.corpus import stopwords

from ftfy import fix_encoding, fix_text, badness
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator

from sklearn.utils import resample

import pickle, string

import cufflinks as cf
cf.go_offline()
cf.set_config_file(offline=False, world_readable=True)

import plotly as py
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs,init_notebook_mode,plot,iplot

import spacy

# Suppress warnings
import warnings; warnings.filterwarnings('ignore')

SEED = 123                 # to be able to rerun the same NN
np.random.seed(SEED)
tf.random.set_seed(SEED)

from IPython.display import display
pd.options.display.max_columns = None
pd.options.display.max_rows = None

! pip install langdetect
from langdetect import detect
from langdetect import detect
!pip install goslate
from goslate import Goslate
!pip install spacy
from collections import defaultdict
from bs4 import BeautifulSoup
import gensim
import gensim.corpora as corpora
#Remove stemming(snowball stemming) add lemmatistaion using simple_process from gensim 
from gensim.utils import simple_preprocess
from gensim.models.ldamodel import LdaModel
from gensim.models import CoherenceModel

# spacy for lemmatization
import spacy

from sklearn.naive_bayes import MultinomialNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC, LinearSVC
from xgboost import XGBClassifier
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer, TfidfTransformer
Collecting ftfy
  Downloading ftfy-6.0.3.tar.gz (64 kB)
     |████████████████████████████████| 64 kB 1.6 MB/s 
Requirement already satisfied: wcwidth in /usr/local/lib/python3.7/dist-packages (from ftfy) (0.2.5)
Building wheels for collected packages: ftfy
  Building wheel for ftfy (setup.py) ... done
  Created wheel for ftfy: filename=ftfy-6.0.3-py3-none-any.whl size=41933 sha256=db9050ed60cc21c4f59ab192f5ade62a9ab383e3048133f211831c6be9c6c7d7
  Stored in directory: /root/.cache/pip/wheels/19/f5/38/273eb3b5e76dfd850619312f693716ac4518b498f5ffb6f56d
Successfully built ftfy
Installing collected packages: ftfy
Successfully installed ftfy-6.0.3
Collecting langdetect
  Downloading langdetect-1.0.9.tar.gz (981 kB)
     |████████████████████████████████| 981 kB 2.0 MB/s 
Requirement already satisfied: six in /usr/local/lib/python3.7/dist-packages (from langdetect) (1.15.0)
Building wheels for collected packages: langdetect
  Building wheel for langdetect (setup.py) ... done
  Created wheel for langdetect: filename=langdetect-1.0.9-py3-none-any.whl size=993242 sha256=6161767995da29d76bd7de8a16a4f0d16ae4062db926a8126909284a6106e2f5
  Stored in directory: /root/.cache/pip/wheels/c5/96/8a/f90c59ed25d75e50a8c10a1b1c2d4c402e4dacfa87f3aff36a
Successfully built langdetect
Installing collected packages: langdetect
Successfully installed langdetect-1.0.9
Collecting goslate
  Downloading goslate-1.5.2.tar.gz (16 kB)
Collecting futures
  Downloading futures-3.0.5.tar.gz (25 kB)
WARNING: Discarding https://files.pythonhosted.org/packages/55/db/97c1ca37edab586a1ae03d6892b6633d8eaa23b23ac40c7e5bbc55423c78/futures-3.0.5.tar.gz#sha256=0542525145d5afc984c88f914a0c85c77527f65946617edb5274f72406f981df (from https://pypi.org/simple/futures/). Command errored out with exit status 1: python setup.py egg_info Check the logs for full command output.
  Downloading futures-3.0.4.tar.gz (25 kB)
WARNING: Discarding https://files.pythonhosted.org/packages/8d/73/b5fff618482bc06c9711e7cdc0d5d7eb1904d35898f48f2d7f9696b08bef/futures-3.0.4.tar.gz#sha256=19485d83f7bd2151c0aeaf88fbba3ee50dadfb222ffc3b66a344ef4952b782a3 (from https://pypi.org/simple/futures/). Command errored out with exit status 1: python setup.py egg_info Check the logs for full command output.
  Downloading futures-3.0.3.tar.gz (24 kB)
WARNING: Discarding https://files.pythonhosted.org/packages/4c/dc/f9473006d4c9c52d4a4e977173fbcbfb1a8ef3a57e32e885edf994fd4a45/futures-3.0.3.tar.gz#sha256=2fe2342bb4fe8b8e217f0d21b5921cbe5408bf966d9f92025e707e881b198bed (from https://pypi.org/simple/futures/). Command errored out with exit status 1: python setup.py egg_info Check the logs for full command output.
  Downloading futures-3.0.2.tar.gz (24 kB)
WARNING: Discarding https://files.pythonhosted.org/packages/f8/e7/fc0fcbeb9193ba2d4de00b065e7fd5aecd0679e93ce95a07322b2b1434f4/futures-3.0.2.tar.gz#sha256=dc3fc91508e49e0fd2f8625f0132d16e49c80f882e7e1d565c56b0d5dfbae257 (from https://pypi.org/simple/futures/). Command errored out with exit status 1: python setup.py egg_info Check the logs for full command output.
  Downloading futures-3.0.1.tar.gz (24 kB)
WARNING: Discarding https://files.pythonhosted.org/packages/b2/2c/6b6a57379e47031c6f52e625e0e2b8f6702a8d1f61b6e0daee391e82c187/futures-3.0.1.tar.gz#sha256=f78f2ef458639d72a625cf9c7643cf5442bb222ac11c12bcc445c6ad1cd862e2 (from https://pypi.org/simple/futures/). Command errored out with exit status 1: python setup.py egg_info Check the logs for full command output.
  Downloading futures-3.0.0.tar.gz (24 kB)
WARNING: Discarding https://files.pythonhosted.org/packages/ea/c9/35287369718fc05059e7a9d0d73c53745fe981010b4185b3858e7d46eff1/futures-3.0.0.tar.gz#sha256=d9cd7bb09aa01f0e4940af64c31fbd7045098b7b4354420d7838ea39e8b86ee3 (from https://pypi.org/simple/futures/). Command errored out with exit status 1: python setup.py egg_info Check the logs for full command output.
  Downloading futures-2.2.0-py2.py3-none-any.whl (16 kB)
Building wheels for collected packages: goslate
  Building wheel for goslate (setup.py) ... done
  Created wheel for goslate: filename=goslate-1.5.2-py3-none-any.whl size=11436 sha256=d0674ba84520a5b858ba1458542cd82376d278efde09276211c762f2b2f1dab6
  Stored in directory: /root/.cache/pip/wheels/a8/8a/c4/85425eac5e0746fd5fc898801858331e55ac386f476d65e58d
Successfully built goslate
Installing collected packages: futures, goslate
Successfully installed futures-2.2.0 goslate-1.5.2
Requirement already satisfied: spacy in /usr/local/lib/python3.7/dist-packages (2.2.4)
Requirement already satisfied: numpy>=1.15.0 in /usr/local/lib/python3.7/dist-packages (from spacy) (1.19.5)
Requirement already satisfied: preshed<3.1.0,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from spacy) (3.0.6)
Requirement already satisfied: setuptools in /usr/local/lib/python3.7/dist-packages (from spacy) (57.4.0)
Requirement already satisfied: requests<3.0.0,>=2.13.0 in /usr/local/lib/python3.7/dist-packages (from spacy) (2.23.0)
Requirement already satisfied: catalogue<1.1.0,>=0.0.7 in /usr/local/lib/python3.7/dist-packages (from spacy) (1.0.0)
Requirement already satisfied: plac<1.2.0,>=0.9.6 in /usr/local/lib/python3.7/dist-packages (from spacy) (1.1.3)
Requirement already satisfied: cymem<2.1.0,>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from spacy) (2.0.6)
Requirement already satisfied: tqdm<5.0.0,>=4.38.0 in /usr/local/lib/python3.7/dist-packages (from spacy) (4.62.3)
Requirement already satisfied: thinc==7.4.0 in /usr/local/lib/python3.7/dist-packages (from spacy) (7.4.0)
Requirement already satisfied: wasabi<1.1.0,>=0.4.0 in /usr/local/lib/python3.7/dist-packages (from spacy) (0.8.2)
Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in /usr/local/lib/python3.7/dist-packages (from spacy) (1.0.6)
Requirement already satisfied: srsly<1.1.0,>=1.0.2 in /usr/local/lib/python3.7/dist-packages (from spacy) (1.0.5)
Requirement already satisfied: blis<0.5.0,>=0.4.0 in /usr/local/lib/python3.7/dist-packages (from spacy) (0.4.1)
Requirement already satisfied: importlib-metadata>=0.20 in /usr/local/lib/python3.7/dist-packages (from catalogue<1.1.0,>=0.0.7->spacy) (4.8.2)
Requirement already satisfied: typing-extensions>=3.6.4 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata>=0.20->catalogue<1.1.0,>=0.0.7->spacy) (3.10.0.2)
Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata>=0.20->catalogue<1.1.0,>=0.0.7->spacy) (3.6.0)
Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0,>=2.13.0->spacy) (3.0.4)
Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0,>=2.13.0->spacy) (2.10)
Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0,>=2.13.0->spacy) (2021.10.8)
Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0,>=2.13.0->spacy) (1.24.3)

2. Importing the Original Data and checking its shape

In [ ]:
dataset = pd.read_excel('/content/sample_data/input_data.xlsx')
dataset.shape
Out[ ]:
(8500, 4)

2.1 Checking for Null values and gettin the sum of all Nulls, column wise.

In [ ]:
dataset.isnull().sum()
Out[ ]:
Short description    8
Description          1
Caller               0
Assignment group     0
dtype: int64

2.2 Displaying the rows with Null Values

In [ ]:
dataset[pd.isnull(dataset).any(axis=1)]
Out[ ]:
Short description Description Caller Assignment group
2604 NaN \r\n\r\nreceived from: ohdrnswl.rezuibdt@gmail... ohdrnswl rezuibdt GRP_34
3383 NaN \r\n-connected to the user system using teamvi... qftpazns fxpnytmk GRP_0
3906 NaN -user unable tologin to vpn.\r\n-connected to... awpcmsey ctdiuqwe GRP_0
3910 NaN -user unable tologin to vpn.\r\n-connected to... rhwsmefo tvphyura GRP_0
3915 NaN -user unable tologin to vpn.\r\n-connected to... hxripljo efzounig GRP_0
3921 NaN -user unable tologin to vpn.\r\n-connected to... cziadygo veiosxby GRP_0
3924 NaN name:wvqgbdhm fwchqjor\nlanguage:\nbrowser:mic... wvqgbdhm fwchqjor GRP_0
4341 NaN \r\n\r\nreceived from: eqmuniov.ehxkcbgj@gmail... eqmuniov ehxkcbgj GRP_0
4395 i am locked out of skype NaN viyglzfo ajtfzpkb GRP_0

2.3 Handling Null values and replacing them with string and then checking count of Null to verify.

In [ ]:
dataset.fillna(str(), inplace=True)
dataset.isnull().sum()
Out[ ]:
Short description    0
Description          0
Caller               0
Assignment group     0
dtype: int64

3. Checking for Duplicate records in the dataset

In [ ]:
duplicate = dataset[dataset.duplicated()]

3.1 There are 83 records which are duplicate

In [ ]:
duplicate.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 83 entries, 51 to 8405
Data columns (total 4 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   Short description  83 non-null     object
 1   Description        83 non-null     object
 2   Caller             83 non-null     object
 3   Assignment group   83 non-null     object
dtypes: object(4)
memory usage: 3.2+ KB

3.2 Removing the duplicate records

In [ ]:
dataset1 = dataset[~dataset.duplicated()]

3.3 Checking for the dataset without duplicates. There are 8417 unique records

In [ ]:
dataset1.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 8417 entries, 0 to 8499
Data columns (total 4 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   Short description  8417 non-null   object
 1   Description        8417 non-null   object
 2   Caller             8417 non-null   object
 3   Assignment group   8417 non-null   object
dtypes: object(4)
memory usage: 328.8+ KB
In [ ]:
dataset1.head(20)
Out[ ]:
Short description Description Caller Assignment group
0 login issue -verified user details.(employee# & manager na... spxjnwir pjlcoqds GRP_0
1 outlook \r\n\r\nreceived from: hmjdrvpb.komuaywn@gmail... hmjdrvpb komuaywn GRP_0
2 cant log in to vpn \r\n\r\nreceived from: eylqgodm.ybqkwiam@gmail... eylqgodm ybqkwiam GRP_0
3 unable to access hr_tool page unable to access hr_tool page xbkucsvz gcpydteq GRP_0
4 skype error skype error owlgqjme qhcozdfx GRP_0
5 unable to log in to engineering tool and skype unable to log in to engineering tool and skype eflahbxn ltdgrvkz GRP_0
6 event: critical:HostName_221.company.com the v... event: critical:HostName_221.company.com the v... jyoqwxhz clhxsoqy GRP_1
7 ticket_no1550391- employment status - new non-... ticket_no1550391- employment status - new non-... eqzibjhw ymebpoih GRP_0
8 unable to disable add ins on outlook unable to disable add ins on outlook mdbegvct dbvichlg GRP_0
9 ticket update on inplant_874773 ticket update on inplant_874773 fumkcsji sarmtlhy GRP_0
10 engineering tool says not connected and unable... engineering tool says not connected and unable... badgknqs xwelumfz GRP_0
11 hr_tool site not loading page correctly hr_tool site not loading page correctly dcqsolkx kmsijcuz GRP_0
12 unable to login to hr_tool to sgxqsuojr xwbeso... unable to login to hr_tool to sgxqsuojr xwbeso... oblekmrw qltgvspb GRP_0
13 user wants to reset the password user wants to reset the password iftldbmu fujslwby GRP_0
14 unable to open payslips unable to open payslips epwyvjsz najukwho GRP_0
15 ticket update on inplant_874743 ticket update on inplant_874743 fumkcsji sarmtlhy GRP_0
16 unable to login to company vpn \n\nreceived from: xyz@company.com\n\nhi,\n\ni... chobktqj qdamxfuc GRP_0
17 when undocking pc , screen will not come back when undocking pc , screen will not come back sigfdwcj reofwzlm GRP_3
18 erp SID_34 account locked erp SID_34 account locked nqdyowsm yqerwtna GRP_0
19 unable to sign into vpn unable to sign into vpn ftsqkvre bqzrupic GRP_0

4. As part of Data Analysis, below plot showcases the distribution of Assignment Group class

In [ ]:
plt.figure(figsize=(20,15))
dataset1['Assignment group'].value_counts().plot(kind='bar')
Out[ ]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f15a3c0bad0>

4.1 It is evident that GRP_0 has got the maximum count in the data set. Plotting another graph for Percentage distribution of Assignment Group Class

In [ ]:
plt.figure(figsize=(20,15))
dataset1['Assignment group'].value_counts(normalize=True).plot(kind='bar')
Out[ ]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f159990b710>
In [ ]:
display(dataset1['Assignment group'].value_counts(normalize=True))
GRP_0     0.467387
GRP_8     0.076631
GRP_24    0.033860
GRP_12    0.030533
GRP_9     0.029939
GRP_2     0.028633
GRP_19    0.025544
GRP_3     0.023761
GRP_6     0.021742
GRP_13    0.017227
GRP_10    0.016633
GRP_5     0.015207
GRP_14    0.014019
GRP_25    0.013782
GRP_33    0.012712
GRP_4     0.011881
GRP_29    0.011524
GRP_18    0.010455
GRP_16    0.010099
GRP_31    0.008198
GRP_7     0.008079
GRP_17    0.008079
GRP_34    0.007366
GRP_26    0.006653
GRP_40    0.005346
GRP_28    0.005228
GRP_41    0.004752
GRP_30    0.004633
GRP_15    0.004515
GRP_42    0.004396
GRP_20    0.004277
GRP_45    0.004158
GRP_1     0.003683
GRP_22    0.003683
GRP_11    0.003564
GRP_21    0.003327
GRP_47    0.003208
GRP_23    0.002970
GRP_48    0.002970
GRP_62    0.002970
GRP_39    0.002257
GRP_27    0.002139
GRP_37    0.001901
GRP_60    0.001901
GRP_44    0.001782
GRP_36    0.001782
GRP_50    0.001663
GRP_65    0.001307
GRP_53    0.001307
GRP_52    0.001069
GRP_51    0.000950
GRP_55    0.000950
GRP_49    0.000713
GRP_59    0.000713
GRP_46    0.000713
GRP_43    0.000594
GRP_32    0.000475
GRP_66    0.000475
GRP_58    0.000356
GRP_38    0.000356
GRP_56    0.000356
GRP_63    0.000356
GRP_68    0.000356
GRP_57    0.000238
GRP_54    0.000238
GRP_72    0.000238
GRP_69    0.000238
GRP_71    0.000238
GRP_70    0.000119
GRP_67    0.000119
GRP_61    0.000119
GRP_73    0.000119
GRP_64    0.000119
GRP_35    0.000119
Name: Assignment group, dtype: float64

5. Exploratory Data Analysis

5.1 Top 5 Callers in each Assignment Group

In [ ]:
top_n = 5
df_sample2 = dataset1['Caller'].groupby(dataset1['Assignment group']).value_counts()
caller_grp = pd.DataFrame(df_sample2.groupby(level=0).nlargest(top_n).reset_index(level=0, drop=True))
caller_grp.head(15)
Out[ ]:
Caller
Assignment group Caller
GRP_0 fumkcsji sarmtlhy 126
rbozivdq gmlhrtvp 71
olckhmvx pcqobjnd 47
efbwiadp dicafxhv 45
mfeyouli ndobtzpw 13
GRP_1 bpctwhsn kzqsbmtp 6
jloygrwh acvztedi 4
jyoqwxhz clhxsoqy 3
spxqmiry zpwgoqju 3
kbnfxpsy gehxzayq 2
GRP_10 bpctwhsn kzqsbmtp 60
ihfkwzjd erbxoyqk 6
dizquolf hlykecxa 5
gnasmtvx cwxtsvkm 3
hlrmufzx qcdzierm 3

5.2 Visualizing Top 5 Callers in each of top 10 Assignment Groups

In [ ]:
top_n = 10
df_sample3 = pd.DataFrame(dataset1.groupby('Assignment group').size(),columns = ['Count']).reset_index()
top_grps = df_sample3.nlargest(top_n, 'Count')['Assignment group'].tolist()

fig_cols = 5
fig_rows = int(np.ceil(top_n/fig_cols))
fig, axes = plt.subplots(fig_rows, fig_cols, figsize=(13,9.5))
fig.suptitle('Top 5 callers in each of top 10 assignment groups- Pie Chart (Fig-8)', y=1, va= 'bottom', size='20')
for row in range(fig_rows):
    for col in range(fig_cols):
        grp_n = fig_cols * row + col
        if grp_n < top_n:
            xs = caller_grp.xs(top_grps[grp_n])
            _ = axes[row,col].pie(xs, autopct='%1.1f%%', explode=[0.05]*5)
            axes[row,col].legend(labels=xs.index,loc="best")
            axes[row,col].axis('equal')
            axes[row,col].set_title(top_grps[grp_n])

plt.tight_layout()

6. Data Preprocessing

6.1 Detecting and Removing Mojibakes

While exploring the original data, we found there were a lot of irrelavent and non-meaning full text which got generated due to decoding of records using unintended character encoding. To handle and replace such text libraries pertaing to Mojibakes were imported and used.

Below function is created to apply to the dataset to detect Mojibakes

In [ ]:
def is_mojibake_impacted(text):
    if not badness.sequence_weirdness(text):
        # nothing weird, should be okay
        return True
    try:
        text.encode('sloppy-windows-1252')
    except UnicodeEncodeError:
        # Not CP-1252 encodable, probably fine
        return True
    else:
        # Encodable as CP-1252, Mojibake alert level high
        return False

6.1.1 Checking for those records which got impacted due to Mojibakes

In [ ]:
dataset1[~dataset1.iloc[:,:].applymap(is_mojibake_impacted).all(1)]
Out[ ]:
Short description Description Caller Assignment group
99 password expiry tomorrow \n\nreceived from: ecprjbod.litmjwsy@gmail.com... ecprjbod litmjwsy GRP_0
116 server issues \r\n\r\nreceived from: bgqpotek.cuxakvml@gmail... bgqpotek cuxakvml GRP_0
124 mobile device activation from: tvcdfqgp nrbcqwgj \nsent: friday, octobe... tvcdfqgp nrbcqwgj GRP_0
162 access to bex \r\n\r\nreceived from: yfqoaepn.xnezhsit@gmail... yfqoaepn xnezhsit GRP_0
164 撤回: ticket_no1564867 -- comments added \n\nreceived from: abcdri@company.com\n\nwindy... tycludks cjofwigv GRP_0
170 [urgent!!] delivery note creation request!! \n\nreceived from: fbvpcytz.nokypgvx@gmail.com... fbvpcytz nokypgvx GRP_18
177 unable to access password_management_tool id p... i try to change now my password acc. to attach... ijeqpkrz nwtehsyx GRP_2
186 青岛兴合机电shipment notification邮箱è... from: \nsent: friday, october 28, 2016 7:20 a... yafxlpwi lhxvatkb GRP_18
222 support für fa.gstry \arexjftu ohxdwngl support für fa.konnica \arexjftu ohxdwngl arexjftu ohxdwngl GRP_24
223 probleme mit bluescreen . hallo ,\n\nes ist erneut passiert. der pc hat ... vrfpyjwi nzhvgqiw GRP_24
238 erp pi and msd crm connectivity issue- serirtc... hi all\n\nwe have a connectivity issue between... kgytujhe bonhwzrx GRP_14
239 printer problem / issue information please complete all required questions below. ... dzjxrkae grqczsmx GRP_3
251 reset the password for fygrwuna gomcekzi on e-... bitte passwort für fygrwuna gomcekzi e-mail z... fygrwuna gomcekzi GRP_0
265 netweaver funktioniert nicht mehr \r\n\r\nreceived from: fcyuqvoj.ajqeidlm@gmail... fcyuqvoj ajqeidlm GRP_0
266 a kündigung for fgxprnub hlanwgqj, 11161827, ... a kündigung for fgxprnub hlanwgqj, 11161827, ... ucawbivs ountxzir GRP_2
270 neues passwort für accountname tgryhu hgygrtui neues passwort für accountname tgryhu hgygrtu... nemzycxb xpsgkahw GRP_0
276 outlook收到箱中folder变为每天一个fol... outlook收到箱中folder变为每天一个fol... bxfdkiol mdqlszvc GRP_30
281 unable to down load ethics module from: brdhdd dhwduw\nsent: thursday, october 2... dqwhpjxy pozjxbey GRP_0
282 转发: company email to private phone-hprdlb... \r\n\r\nreceived from: hprdlbxf.nozjtgwi@gmail... hprdlbxf nozjtgwi GRP_0
306 support für we111\zlqfptjx xnklbfua support für we111\zlqfptjx xnklbfua zlqfptjx xnklbfua GRP_24
357 EU_tool aktualisierung alle 15 min läuft nich... EU_tool aktualisierung alle 15 min läuft nicht kfirsmxn dieluyra GRP_25
359 problems with nikulatrhdy \n\nreceived from: koahsriq.wdugqatr@gmail.com... koahsriq wdugqatr GRP_0
363 kalendereinträge - qdxyifhj zbwtunpy hallo, bitte einmal ansehen. danke. qmwhlnev ixtmkwdc GRP_33
375 engineering record issue 1.) eng records for engineering tool can't use... rcpghuqb bxrqamng GRP_25
403 node lhqsv4567465 located at usa is down since... node lhqsv4567465 located at usa is down since... spxqmiry zpwgoqju GRP_8
410 access to retired employee's collaboration_pla... Myhrt sthry retired as plant manager in usa. i... rozsyfai zncajubh GRP_16
446 setup rechner ewel8323865 für hr.thrydad (thr... setup rechner ewel8323865 für hr.thrydad (thr... puxsvfwr cwkjruni GRP_24
450 bitte konto ewel8323865 reaktivieren \laptop f... bitte konto ewel8323865 reaktivieren \laptop f... ughzilfm cfibdamq GRP_0
454 support für fa.thrydsss-funke \laeusvjo fvaihgpx support für fa.thrydsss-funke \laeusvjo fvaihgpx laeusvjo fvaihgpx GRP_24
461 hpqc delivers error message: "user is not main... my user id: thrydksd'\ni´m involved in uat ua... iavozegx jpcudyfi GRP_38
463 telefon gigaset m2 ex professional (tel.:) lä... telefon gigaset m2 ex professional (tel.:) lä... txkgmwbc qohmgwrp GRP_33
467 timerecording terminals in plant germany no co... timerecording terminals in plant germany no co... smpijawb eawkpgqf GRP_33
469 ie浏览器打开crm系统后提示用户已è¢... ie浏览器打开crm系统后提示用户已è¢... cjnlsbkq ocxnrewb GRP_31
476 server lnbdm839 (active directory) located in... server lnbdm839 (active directory) located in... bozdftwx smylqejw GRP_39
481 zeitdaten für germany steel fehlen seit gestern ca. 15.00 uhr fehlen alle zeitdat... yjofqlrx aqvxfhmn GRP_33
487 msd - office 2013 outlook 打不开,显示æ—... please provide details of the issue.\n[‎2016... melhduty gqchtedl GRP_31
488 看不见a3 的文件夹(\\HostName_17\teams... 看不见a3 的文件夹(\\HostName_17\teams... hlrmufzx qcdzierm GRP_12
509 request to reset microsoft online services pas... from: microsoft on behalf of company inc. [mai... szrglyte stvyhzxn GRP_0
514 unable to login to outlook from: rjsulvat uanigkqc \nsent: monday, octobe... rjsulvat uanigkqc GRP_0
524 a kündigung for eluvxqhw gpbfkqeu, 11166428, ... hello , \r\n\r\na kündigung for eluvxqhw gpbf... trgqbeax hfyzudql GRP_2
537 us time change \n\nreceived from: xabkyoug.wdkyiqfx@gmail.com... xabkyoug wdkyiqfx GRP_0
549 i can´t connect my note book to the " vpn " i can´t connect my note book to the vpn " "\... hycqbvem oriablwt GRP_0
561 need to create the delivery note for sto 50192... need to create the delivery note for sto 50192... gokluswt qlvzreyb GRP_6
565 new iphone activation / die synchronisierung ... \n\nreceived from: ecwtrjnq.jpecxuty@gmail.com... ecwtrjnq jpecxuty GRP_0
570 druckerfunktionsstörung \r\n\r\nreceived from: fdqjsygx.aivdjqtr@gmail... fdqjsygx aivdjqtr GRP_42
574 pc der maschine r45 defekt pc der maschine r45 fährt nicht mehr hoch. baygwijr lavxwkfc GRP_33
578 EU_tool in germany steel ohne funktion - rück... EU_tool in germany steel ohne funktion - rück... rvdtagmf klbnhydo GRP_25
586 probleme mit bluescreen hallo ,\n\ngerade eben ist der computer an mei... vrfpyjwi nzhvgqiw GRP_24
589 probleme mit lan für rechner erodiermaschine\... probleme mit lan für rechner erodiermaschine\... dtlmbcrx mwuateyx GRP_24
605 material type "nd" doesn ´t create any requr... \r\n\r\nreceived from: qbewrpfu.lwibmxzo@gmail... qbewrpfu lwibmxzo GRP_13
608 etiketten drucker im bereich endkontrolle germ... funktionsstörung tzmewbdv zjbuwmkn GRP_33
618 outlook打不开 打开outlook时,显示outlook.ost文件错è... qayozbcd vwglmpjq GRP_0
628 office excel ,powerpoint 打开一些文件时... office excel ,powerpoint 打开一些文件时... hdungfsc znuhyjkx GRP_31
635 inquiry on impact awards from: wauhocsk vxuikqaf \nsent: sunday, octobe... wauhocsk vxuikqaf GRP_0
643 interface: gigabitethernet0/2 · usa-2950-acce... interface: gigabitethernet0/2 · usa-2950-acce... spxqmiry zpwgoqju GRP_4
709 business partner id - bertsckaadyd122 \n\nreceived from: inxsupmy.zhwmifvx@gmail.com... inxsupmy zhwmifvx GRP_2
722 email address in purchasing from: dpuifqeo eglwsfkn \nsent: friday, octobe... dfetvmzq brxavtzp GRP_2
729 interface: gigabitethernet1/0/40 ·shopfloor_1... gigabitethernet1/0/40 · shopfloor_111_schuett... jyoqwxhz clhxsoqy GRP_8
744 drucker in löwe uacyltoe hxgayczeraum knicrht... drucker in löwe uacyltoe hxgayczeraum knicrht... dtlmbcrx mwuateyx GRP_24
745 benötige zugriff auf schichtplaner bitte zugriff auf folgenden pfad einrichten: d... svelutaj nguzrmec GRP_34
750 expedite mm3516492 mm2405383 hi,team\npls. help to run out dn against sto... wktesmbp lorjymef GRP_6
751 we111 gibt nur eine fehlermeldung aus we111 gibt nur eine fehlermeldung aus :\r\nanm... vzqomdgt jwoqbuml GRP_24
757 rechner für längenmessmaschine uacyltoe hxga... rechner für längenmessmaschine uacyltoe hxga... xosdfhbu gtbfkisl GRP_24
758 pc name \n\nreceived from: koahsriq.wdugqatr@gmail.com... koahsriq wdugqatr GRP_28
760 pw reset for erp - user name: pihddltzr123 \n\nreceived from: iboltufk.ezfnvcqp@gmail.com... iboltufk ezfnvcqp GRP_0
764 please release access to \\HostName_705\teams\... \r\n\r\nreceived from: byclpwmv.esafrtbh@gmail... byclpwmv esafrtbh GRP_33
765 pls release access to \\HostName_705\lean\ \r\n\r\nreceived from: byclpwmv.esafrtbh@gmail... byclpwmv esafrtbh GRP_0
769 backup für rechner lasplant_119\pfjwinbg ljt... backup für rechner lasplant_119\pfjwinbg ljt... pfjwinbg ljtzbdqg GRP_24
774 *** urgent *** please reactive user id = dudyh... \n\nreceived from: hupnceij.hyozjakb@gmail.com... hupnceij hyozjakb GRP_33
777 pw reset for erp - user name piltzrnj567 th... \r\n\r\nreceived from: iboltufk.ezfnvcqp@gmail... iboltufk ezfnvcqp GRP_0
779 collaboration_platform nicht verfügbar - kein... collaboration_platform nicht verfügbar - kein... vdylwkbo hzlnrgat GRP_0
784 reinecker wzs 60 r 241 abteilung kentip pc fährt nach neustart nicht mehr hoch bzw. b... jfteqgyc ncazxobk GRP_33
791 erp密码忘记 id: fenthgh45 erp password wa... \n\nreceived from: jmxqhrfa.vawptbfl@gmail.com... jmxqhrfa vawptbfl GRP_0
819 collaboration_platform - industrial \r\n\r\nreceived from: okmhzgcq.wzvetbqa@gmail... okmhzgcq wzvetbqa GRP_16
824 my outlook doesn´t work. in last fourth weeks i have same problem. i st... dismypxe zkwcmgsr GRP_0
833 interface 'serial0/0/0 and serial0/1/1:0 · co... interface 'serial0/0/0 and serial0/1/1:0 · co... rkupnshb gsmzfojw GRP_8
839 access to http:://bddjwwwdw/ for axcbfuqo yiag... axcbfuqo yiagubvh no longer has access to http... axcbfuqo yiagubvh GRP_25
845 urgent - please create tax code a0 in cc54yks09. please create tax code a0 in cc54yks09.this is... kxcawjet xmybdwfh GRP_10
863 EU_tool, pdv, batch management does not work working with the systems is almost impossible.... hwxqoijt cotsgwrj GRP_25
864 xvwchsdg pladjmxt - employee termination pn gehe einmal davon aus das herr aurwddwacher zu... upiyobvj lwohuizr GRP_33
879 barcode über word etikettendruck ich will im etikettendruck eine zahlen buchsta... cwrikael oanmsecr GRP_42
887 support für roboworker \sandstrahlen \xwirzvd... support für roboworker \sandstrahlen \xwirzvd... xwirzvda okhyipgr GRP_24
893 telefon im "meeting room 3" telefonnummer 263 ... wenn jemand von außerhalb anruft dann klingel... fhzeoyws gudfnirz GRP_33
898 erp printing - !production order! printer mp74... production order printer mp7456 is being repla... xvwchsdg pladjmxt GRP_5
904 probleme mit barcode etiketten \volume format ... obleme mit barcode etiketten \volume format zu... vzqomdgt jwoqbuml GRP_24
906 der monitor an unser längenmessmaschine ist d... guten morgen hkydrfdw,\n\nder monitor an unser... ctbsupdy auhocbli GRP_24
907 fregabe für ordner applications wurde gekappt??? bitte wieder freigeben lzpuyrvw zkxbacvn GRP_0
909 restore \n\nreceived from: trgqbeax.hfyzudql@gmail.com... trgqbeax hfyzudql GRP_12
913 schreibrechte für ksdvp3, ce_leiter schreibrechte für ksdvp3, ce_leiter nemzycxb xpsgkahw GRP_34
932 mm # 2639483 iak-urgent from: kwddwdw hudfefwe \nsent: thursday, octob... ovhtgsxd dcqhnrmy GRP_18
952 collaboration_platform system not working the collaboration_platform system is not worki... wgqkKupL esgahtqn GRP_16
967 it_ passwort \n\nreceived from: soldfnbq.uhnbsvqd@gmail.com... soldfnbq uhnbsvqd GRP_0
981 employment status - new non-employee ycgkinov ... *page down to ensure that all required data fi... lfikjasz tjbqcmvl GRP_2
982 business_client无法登入 \n\nreceived from: ktghvuwr.uwtakcmj@gmail.com... ktghvuwr uwtakcmj GRP_0
993 bitte scanner für we111 einstellen.drucker wu... bitte scanner für we111 einstellen.drucker wu... vzqomdgt jwoqbuml GRP_24
1016 impact award \n\nreceived from: lzvdyouh.imqgfadb@gmail.com... lzvdyouh imqgfadb GRP_0
1018 folder access : s:\globalace_holemaking\ha646 please see below in red:\r\n\r\nviele grüße ... cltszugw tgzbklec GRP_34
1028 waste email from workflow system \r\n\r\nreceived from: creojvdh.ciblyskg@gmail... creojvdh ciblyskg GRP_29
1034 lüfter defekt \industriekontrollmonitor \nipt... lüfter defekt \industriekontrollmonitor \nipt... niptbwdq csenjruz GRP_24
1036 probleme mit kamera und monitor . guten morgen dhthykts,\n\num die kamera an den... vrfpyjwi nzhvgqiw GRP_24
1038 probleme mit fp \rechner bleibt hängen \ksxc... probleme mit fp \rechner bleibt hängen \ksxc... ksxchbaf rhquvzfm GRP_24
1048 verbindung zum internet server nicht möglich ... fehlermeldung in der mail,: dieser vorgang wur... usdhpatm icgmphjk GRP_0
1053 reset passwords for fygrwuna gomcekzi using pa... bitte erp kennwort zurücksetzen. fygrwuna gomcekzi GRP_17
1059 netzteil oder netzstecker defekt - pc warenein... bitte netzteil oder netzstecker am pc evhw8114... ipeajlvk idmbyztf GRP_33
1081 用友软件问题 打开已关闭的销售订单时,显示"不... bwstnmjh yqumwrsk GRP_48
1094 envoyé à partir de l’outil capture d’écran \r\n\r\nreceived from: orvsydzf.rbqtpdaz@gmail... orvsydzf rbqtpdaz GRP_0
1109 skype \r\n\r\nreceived from: eqxyvfpi.gbaljypo@gmail... eqxyvfpi gbaljypo GRP_0
1134 odbc-fehler - systemfehler 126: das angegebene... ich möchte einen serienbrief erstellen, kann ... upiyobvj lwohuizr GRP_33
1143 please redirect to local it: word document 134... das folgende word-dokument kann nicht mehr bea... cwtrgeup nxctkmzo GRP_49
1146 fc for eweausbildung for jannek hündling hallo,\n\nbitte für jannek hündling zugriff ... fdyietau dvsyxwbu GRP_12
1156 monitor für video messmaschine liefern \ozifl... monitor für video messmaschine liefern \ozifl... vrfpyjwi nzhvgqiw GRP_24
1161 res: inc1559160-problem with engineering_tool ... \n\nreceived from: qasdhyzm.yuglsrwx@gmail.com... qasdhyzm yuglsrwx GRP_25
1167 multiple app problems, user jesjnlyenmrest34 \n\nreceived from: vogtfyne.isugmpcn@gmail.com... gkzedilm tkpfumeb GRP_25
1169 fc for HostName_717\sk-leitung for user mokolt... hallo ,\n\nbitte lese- u. schreibberechtigung ... lhmxposv lnpgjhus GRP_12
1175 bitte das iphone-6 001 freischalten für mail-... \n\nreceived from: rtnyumbg.yzemkhbq@gmail.com... rtnyumbg yzemkhbq GRP_0
1178 电话机没有声音 电话机没有声音 cyjlqdwm kywiuosn GRP_30
1186 erp did not pull the customer part number from... hello it-team,\r\n\r\nplease help to check the... bswlorek yhdrlgbs GRP_13
1201 plant_269 xerox workcentre 5335 - rr18 printer \r\n\r\nreceived from: dpuifqeo.eglwsfkn@gmail... dpuifqeo eglwsfkn GRP_3
1238 push-out reports \r\n\r\nreceived from: ctzykflo.evzbhgru@gmail... ctzykflo evzbhgru GRP_9
1242 weekly sales activity reports \n\nreceived from: mnlvhtug.imvetgoa@gmail.com... mnlvhtug imvetgoa GRP_0
1244 rechner für EU_tool stäbe hängt sich auf .\... rechner für EU_tool stäbe hängt sich auf .\... spdczoth vajtodny GRP_24
1247 rechner für infostand defekt \probleme mit lÃ... rechner für infostand defekt \probleme mit lÃ... niptbwdq csenjruz GRP_24
1248 wir brauchen usb-stick (16gb) für 1.lehrjahr ... hallo ,\n\nwir brauchen usb-stick (16gb) für ... dknejifu dljvtebc GRP_24
1271 fe10 复印出来纸张文字不清晰 fe10 复印出来纸张文字不清晰 omatlyrd bvraipdt GRP_30
1272 probleme mit benutzer erneut nur temporär! \m... probleme mit benutzer erneut nur temporär! \m... xwirzvda okhyipgr GRP_24
1277 bob j hello,\r\n\r\nbob-j geht wieder mal nicht.\r\n... gmneclxj czqthmrs GRP_0
1279 更換密碼後, company connect可登入,但ç... 更換密碼後, company connect可登入,但ç... zhpwcdea cboefuis GRP_0
1282 r126 pc def. evtl. netzgerät r126 pc def. evtl. netzgerät fasirxzo xlvnhptm GRP_33
1283 keine datenübertragung erp datenbank zu tinte... bitte sofort lokalen it support smpijawb eawkp... cwrikael oanmsecr GRP_42
1288 EU_tool ausfall in germany keine rückmeldungen und zuteillisten möglich vumbyikw kqsiougd GRP_25
1306 转发: recall plant_247-101516-02 ref 650007... \n\nreceived from: wktesmbp.lorjymef@gmail.com... wktesmbp lorjymef GRP_18
1344 pc an r241 in halle c nähe büro nesner fähr... einlasten bei it-germany, da hierfür bereits ... dtrvxiuq bwuqdtfo GRP_33
1352 server lnbdm839 (active directory) located in... server lnbdm839 (active directory) located in... dkmcfreg anwmfvlg GRP_8
1358 pass word reset \n\nreceived from: ujzhflpn.oibnxrvq@gmail.com... ujzhflpn oibnxrvq GRP_0
1361 vogelfontein, south africa , sa:company-eu-zaf... vogelfontein, south africa , sa:company-eu-zaf... oldrctiu bxurpsyi GRP_8
1411 a kündigung for dxnzkcuh eqdgoxap, 11161751, ... hello , \r\n\r\na kündigung for dxnzkcuh eqdg... ucawbivs ountxzir GRP_2
1443 reporting_tool dashbankrd not appearing in my ... \n\nreceived from: xfdkwusj.gyklresa@gmail.com... xfdkwusj gyklresa GRP_22
1445 account zugriff dyqekzuc djhznybt wiederherst... \r\n\r\nreceived from: urpbyoke.vwcbhmds@gmail... urpbyoke vwcbhmds GRP_0
1451 probleme mit der anzeige von offenen emails \n\nreceived from: ecoljnvt.lbdqmvfs@gmail.com... ecoljnvt lbdqmvfs GRP_0
1452 考勤系统进不去请处理谢谢! 考勤系统进不去请处理谢谢! spgdcvhb ocagnpmj GRP_30
1461 erp working slow \n\nreceived from: rayhtukumujar.br2@company.c... fgdsvija qvixmubh GRP_14
1472 order shipped ups ground \r\n\r\nfrom: tjlgzkbp iervwjzg \r\nsent: thur... tjlgzkbp iervwjzg GRP_18
1485 erp password reset for user kambthryes2 ( ex... please reset erp - password of kambthryes2 ( e... mrwucjho tjeaxnhu GRP_0
1512 erp inbox, ich bin nicht im workcenter nr. 511... workflow meiner gruppe für mich nicht sichtba... xsnrlygw nmqyraec GRP_13
1532 i need accsess to this link. \r\n\r\nreceived from: rcmziuob.xhblozqe@gmail... rcmziuob xhblozqe GRP_0
1548 drucker em93 macht willkürlich flecken auf de... drucker em93 macht willkürlich flecken auf de... ltxzfcgm sxvigclz GRP_42
1550 computer check \n\nreceived from: qhyoiwls.uynrhiva@gmail.com... qhyoiwls uynrhiva GRP_28
1554 答复: ticket_no1559458 : wireless guest acce... \n\nreceived from: krnzfoct.mnjbuedp@gmail.com... krnzfoct mnjbuedp GRP_0
1565 datenübertragung nicht möglich! keine datenübertragung von pc auf maschine mÃ... ajiqfrkz dolmpkqf GRP_42
1568 电脑无法打开网页。 张工您好!\r\n我的电脑连接公司网... tmufgokq qtzavows GRP_31
1575 pricing condition issue in oa \r\n\r\nreceived from: rayhtukumujar.br2@compa... fgdsvija qvixmubh GRP_13
1577 the printer is defaulting to the usa printer f... from: kryuisti turleythy \nsent: wednesday, oc... hybiaxlk lawptzir GRP_18
1608 wifi \n\nreceived from: nbdljruw.axcrspyh@gmail.com... nbdljruw axcrspyh GRP_0
1622 nachdem ich outlook geöffnet habe und eine e-... mehr machen. bitte dringend um hilfe. meine mo... byltiakh vinqkxzm GRP_0
1630 druker \r\n\r\nreceived from: xahuklgm.dqvkfjlb@gmail... xahuklgm dqvkfjlb GRP_33
1642 basis on-call / shift details... \r\n\r\nreceived from: mnxbeuso.rfmdlwuo@gmail... mnxbeuso rfmdlwuo GRP_0
1648 druckerprobleme mit em021 in germany \r\n\r\nreceived from: mobaidfx.gviwlsrm@gmail... mobaidfx gviwlsrm GRP_42
1654 telephony_software via remote number \n\nreceived from: gmwdvrou.aupnvems@gmail.com... gmwdvrou aupnvems GRP_7
1657 bitte einen ordner im teams laufwerk fürth an... \n\nreceived from: osjqfbvw.hlmgrfpx@gmail.com... osjqfbvw hlmgrfpx GRP_12
1661 owner of the group "k-bhty-plc4-yhhm-er" is no... owner of the group "k-bhty-plc4-yhhm-er" is no... tcbonyes gpfacron GRP_0
1664 maschinen pc r239 ohne funktion -> bitte prüfen maschinen pc r239 ohne funktion -> bitte prüfen ljxzyriq zqxkrcev GRP_33
1665 faxen von purchase orders direkt aus der po sc... faxen von purchase orders direkt aus der po sc... rnajgdmb fioznltc GRP_29
1666 rechner für erodiermaschine defekt \youfzmgp ... rechner für erodiermaschine defekt \youfzmgp ... youfzmgp xvysrnmb GRP_24
1667 reset the password for xbsckemt durnfyxb on so... hallo,\r\nbitte nur das passwort für "apprica... xbsckemt durnfyxb GRP_0
1669 in outlook ,i am not getting pauhtul.phillyhui... in outlook ,i am not getting pauhtul.phillyhui... jacgtfxo vlbeuxif GRP_0
1678 probleme mit skype \r\n\r\nreceived from: fmhlugqk.dpraethi@gmail... fmhlugqk dpraethi GRP_0
1700 账户被锁定 用户忘记密码,导致账户锁定 neovalui kabpfvic GRP_48
1701 输入用户名和密码后显示出错 登录时输入用户名和密码后,显示ç... ofiglcaq hvcqtzel GRP_48
1704 账户被锁定 用户账户锁定,请求解锁 yvscpgax wdfxytzu GRP_48
1710 产品与仓库对不上 产品所在仓库出错。 st6p -100/+325 ... tvykmlex hcawjigf GRP_48
1711 开机黑屏 电脑启动后黑屏,主机无报警 ofiglcaq hvcqtzel GRP_48
1712 outlook重复要求输入密码 outlook密码框反复弹出,要求输入密... jtplaoui uvsihfpn GRP_48
1736 die synchronisierung mit exchange activesync i... from: lhejbwkc xbmyvnqf \r\nsent: tuesday, oct... lhejbwkc xbmyvnqf GRP_0
1737 defect scanner/printer "vh42-plant manager/con... from: byclpwmv esafrtbh \nsent: tuesday, octob... byclpwmv esafrtbh GRP_33
1739 is there cert open for the usa telephone syste... \r\n\r\nreceived from: oxlqvika.zrvbahym@gmail... oxlqvika zrvbahym GRP_37
1740 password reset from: microsoft on behalf of company inc. [mai... wjdatzyv bhkoldjv GRP_0
1755 i̇lt: outlook is unable to connect \n\nreceived from: gacfhedw.iqustfzh@gmail.com... gacfhedw iqustfzh GRP_0
1757 please provide ip for our 5 users 发件人: white, ben <ben.white@hr_tool.com>\... krnzfoct mnjbuedp GRP_56
1760 vip 1 sending email on behalf offinance_vip1 \n\nreceived from: lpoebzsc.grknswyo@gmail.com... lpoebzsc grknswyo GRP_26
1766 handscanner an pc evhw8114203 bei r208 funktio... handscanner an pc evhw8114203 bei r208 funktio... ljxzyriq zqxkrcev GRP_33
1772 neuen ordner im info von EU_tool anlegen (plea... \r\n\r\nreceived from: qavdrpfu.ylfwnbkr@gmail... qavdrpfu ylfwnbkr GRP_25
1774 replizieren \r\n\r\nreceived from: trgqbeax.hfyzudql@gmail... trgqbeax hfyzudql GRP_0
1784 einstellungen am alfa set messgerät überprü... am rollomatic alfa set messgerät müssen die ... hwfckjzs abxdmyho GRP_33
1785 reset the password for eglavnhx uprodleq on er... bitte passwort für benutzer "franhtyuj1" zurÃ... eglavnhx uprodleq GRP_0
1790 passwordproblems \n\nreceived from: agjzikpf.nhfrbxek@gmail.com... agjzikpf nhfrbxek GRP_0
1805 wifi 不能在线 wifi 不能在线 qnvkwalx dfjtxigl GRP_30
1807 无法创建skype会议,outlook 日历上面... 无法创建skype会议,outlook 日历上面... dqovxreg qswvlctg GRP_31
1808 我的outlook 打印邮件设置纵向,打å‡... 我的outlook 打印邮件设置纵向,打å‡... dizquolf hlykecxa GRP_31
1813 need ticket \r\n\r\nreceived from: dqplrwoy.cutpwjie@gmail... dqplrwoy cutpwjie GRP_6
1815 password reset from: microsoft on behalf of company inc. [mai... xbfcitlh ntulmcpq GRP_0
1829 ich kann meinen vpn nicht öffnen ich kann meinen vpn nicht öffnen anivdcor rbmfhiox GRP_0
1854 us_plant (plant_269) - servers not connecting \r\n\r\nreceived from: dpuifqeo.eglwsfkn@gmail... dpuifqeo eglwsfkn GRP_12
1855 printer problem / issue information -- zebra l... please complete all required questions below. ... okfmbqur efzukjsa GRP_0
1869 archiving_tool client log file \r\n\r\nreceived from: qjtbrvfy.avwqmhsp@gmail... qjtbrvfy avwqmhsp GRP_0
1872 rücksetzung der passwörter für accounts vvw... rücksetzung der passwörter für accounts vvw... yvjdluhk hmcpvtdj GRP_0
1885 automatische anmeldung wenn ich outlook aufrufe, will der rechner gle... dhraxivp enmfvuqb GRP_0
1899 probleme mit we110 \die druckerwarteschlange w... probleme mit we110 \die druckerwarteschlange w... fdyietau dvsyxwbu GRP_24
1914 skype läd auf dem pc nicht. skype wird nicht vom browser geladen auch nich... qpbwfvdm ytlnodrv GRP_0
1918 no vpn access - pls help \n\nreceived from: luagmhds.iymwcelx@gmail.com... luagmhds iymwcelx GRP_0
1935 msd crm--outlook 一直显示正在启动,ä¸... please provide details of the issue.\r\noutloo... loesgbfh tknsuhvw GRP_31
1938 vpn not working for rjeyfxlg ltfskygw 219581173[‎10/‎10/‎2016 5:01 am] lzspyjk... lzspyjki smdbqnef GRP_0
1954 笔记本重新装下系统 把我的笔记本重新装下系统 xjvubmlq vyamhjip GRP_30
1955 有一个链接文件打不开 有一链接文件打不开,提示版本低 qsfcxzel quwykhno GRP_30
1966 password resset request from: rakthyesh ramdntythanjesh \nsent: sunday... hgudmrta vidzoqfl GRP_0
1975 mobile device activation from: ftnijxup sbltduco \r\nsent: saturday, oc... ftnijxup sbltduco GRP_0
1981 telephone repair pu-4 (shop floor 173 connect... \n\nreceived from: segvwfyn.mogtrevn@gmail.com... segvwfyn mogtrevn GRP_19
1984 supply_chain_software password reset supply_chain_software password reset\n\nfrom: ... qbzlfeva fmxwqugs GRP_0
1990 ms outlook2013 一直显示正在启动,不è... please provide details of the issue.\r\nms out... molihtdq auprogsj GRP_31
2003 答复: password_management_tool \n\nreceived from: weqocbnu.eoixcpvy@gmail.com... weqocbnu eoixcpvy GRP_0
2004 电脑登录密码忘记,重置密码。 电脑登录密码忘记,重置密码。 weqocbnu eoixcpvy GRP_31
2036 passwort geoyhurg chriuimjiann \n\nreceived from: nsoikcyf.jhybqael@gmail.com... nsoikcyf jhybqael GRP_0
2082 printing language sa38 (reporting rfumsv00) please complete all required questions below. ... ojhiaubp lovgirtm GRP_33
2085 benötige eine zahlenblock-tastatur für linke... hallo .\n\nbenötige eine zahlenblock-tastatur... wtgbdjzl coliybmq GRP_24
2086 EU_tool läuft sehr langsam im werk germany. r... aktuell können keine rückmeldungen in EU_too... lpnzjimy mwtvondq GRP_25
2087 stocktransfer goes not mm 5112324 \r\n\r\nreceived from: mgahlpwx.jwtfpaxh@gmail... mgahlpwx jwtfpaxh GRP_6
2102 need access \n\nreceived from: tcbonyes.gpfacron@gmail.com... tcbonyes gpfacron GRP_0
2108 i cannot extract finance_app data - pls resolv... \n\nreceived from: uagqromi.sqgtkmci@gmail.com... uagqromi sqgtkmci GRP_55
2144 passwort geoyhurg chriuimjiann \r\n\r\nreceived from: nsoikcyf.jhybqael@gmail... nsoikcyf jhybqael GRP_0
2155 gflewxmn qnxhoryg (term date 8/31/2016) - netw... i never received access to tghkris wickhamtf's... vpksyfco chosuygq GRP_12
2156 probleme mit com port \maschine stöhrmann \ea... probleme mit com port \maschine stöhrmann \ea... eaodcgsw trmzwbyc GRP_24
2162 passwort geoyhurg chriuimjiann \r\n\r\nreceived from: nsoikcyf.jhybqael@gmail... nsoikcyf jhybqael GRP_0
2184 crm - screen - advanced find- create view \r\n\r\nreceived from: ctzykflo.evzbhgru@gmail... ctzykflo evzbhgru GRP_0
2187 monitor vom rfa-analysegerät defekt hallo ,\n\nkannst du mal bei uns im labor vorb... rclqfpgt tbnovxdp GRP_24
2260 probleme mit pf3 port 13 \r\n\r\nreceived from: ughzilfm.cfibdamq@gmail... ughzilfm cfibdamq GRP_24
2263 system disk c: of server HostName_698 is full ... \r\n\r\nreceived from: fbyusmxz.kxvmcbly@gmail... fbyusmxz kxvmcbly GRP_12
2266 the report none of the lookup/sort features ar... i have access to the report and reporting_tool... wszbxlpu dsujiozp GRP_9
2282 wk38 -> qdxyifhj zbwtunpy halo ,\nder drucker bereitet problem mit dem d... qmwhlnev ixtmkwdc GRP_33
2288 russia: interface: vlan1 · russia engineering... russia: interface: vlan1 · russia engineerin... rkupnshb gsmzfojw GRP_8
2294 login is not possible ?? \n\nreceived from: lgeuniqf.ijsnyxgf@gmail.com... lgeuniqf ijsnyxgf GRP_0
2307 probleme beim zugriff auf zeichnungen - netwea... \n\nreceived from: ecoljnvt.lbdqmvfs@gmail.com... ecoljnvt lbdqmvfs GRP_0
2313 interface: fastethernet0/2 · timeclock on clh... interface: fastethernet0/2 · timeclock on clh... dkmcfreg anwmfvlg GRP_8
2316 can't access erp by vpn \n\nreceived from: ovhtgsxd.dcqhnrmy@gmail.com... ovhtgsxd dcqhnrmy GRP_0
2349 can you please check – marftgytin höpfner (... can you please check – marftgytin höpfner (... karoyclq ivlabkhu GRP_22
2353 locked out of poruxnwb yfaqhceo received from: hwbukcsm.hwobikcv@gmail.com\r\n... hwbukcsm hwobikcv GRP_1
2381 password reset request to reset user's password \n th... apokrfjv mdiepcul GRP_0
2383 bluescreen ewew8323733 \wrcktgbd wzrgyunp hallo ,\r\n\r\nmein pc zeigt blaue seite mit v... wrcktgbd wzrgyunp GRP_24
2386 password reset \n request to reset user's password \n ... icyxtqej lqsjrgzt GRP_0
2403 plm-engineering tool: pdf-files can not be op... \r\n\r\nreceived from: tgafnyzb.hnevrcuj@gmail... tgafnyzb hnevrcuj GRP_14
2404 info type '0017' is missing to personal number... from: qkmvosen opundxsk \nsent: tuesday, octob... qkmvosen opundxsk GRP_10
2419 crm- extensions - forecast view \r\n\r\nreceived from: ctzykflo.evzbhgru@gmail... ctzykflo evzbhgru GRP_40
2420 app probleme \r\n\r\nreceived from: ecoljnvt.lbdqmvfs@gmail... ecoljnvt lbdqmvfs GRP_0
2422 unable to do ethics course. have not received ... mr. kothyherr has not received any mail from e... ecoljnvt lbdqmvfs GRP_23
2432 problems with wifi \r\n\r\nreceived from: hpeknoam.yrfowmva@gmail... hpeknoam yrfowmva GRP_0
2439 do i have to worry about this? \r\n\r\nreceived from: hbmwlprq.ilfvyodx@gmail... hbmwlprq ilfvyodx GRP_0
2443 request to reset microsoft online services pas... request to reset user's password \n the... oebrjdqc nhuqmskw GRP_0
2445 vh 27 - werk germany - fehlende druckaufträge... \r\nbei drucker vh 27 keine ausgabe der drucka... ucawbivs ountxzir GRP_0
2467 urgent help required-outlook to crm mfg_toolti... \r\n\r\nreceived from: fjohugzb.fhagjskd@gmail... fjohugzb fhagjskd GRP_0
2492 printing request - request transaction print t... please complete all required questions below. ... omiwzbue auvolfhp GRP_45
2525 cad team code - characteristic can you please add "manuf eng – us_plant" ?\r\n ctvaejbo mjcerqwo GRP_11
2538 passwords to be reset for erp SID_34 \r\n\r\nreceived from: dpuifqeo.eglwsfkn@gmail... dpuifqeo eglwsfkn GRP_0
2551 erpsys- certificate errors \n\nreceived from: qgrbnjiu.hidzlfma@gmail.com... hgcrtxez azoeingw GRP_14
2611 reisekostenabrechnung in erp nicht möglich! \r\n\r\nreceived from: byclpwmv.esafrtbh@gmail... byclpwmv esafrtbh GRP_10
2697 anmelden bei outlook nicht möglich anmelden bei outlook seit passwort ändern nic... cobdhkmj bikjecaz GRP_42
2708 hello please urgently fix! mwst error。ã€... hello\r\n\r\nplease urgently fix!\r\n\r\nmwstã... uxndyfrs vahxnfgl GRP_13
2709 probleme mit drucken \vermessungsmaschine für... probleme mit drucken \vermessungsmaschine für... eaodcgsw trmzwbyc GRP_24
2710 support für alicona \jionmpsf wnkpzcmv support für alicona \jionmpsf wnkpzcmv jionmpsf wnkpzcmv GRP_24
2721 drucker em26 druckt mit bis zu 2 std. verzöge... drucker em26 druckt mit bis zu 2 std. verzöge... djilqgmw bidchqsg GRP_42
2722 info type '0017' is missing to personal number... from: lmwohkbd ucziatex \nsent: friday, septem... lmwohkbd ucziatex GRP_10
2730 unser kopierer we 95 (hp laserjet) zeigt fehle... unser kopierer we 95 (hp laserjet) zeigt fehle... fdyietau dvsyxwbu GRP_24
2736 vpn安装-转贺正平 \n\nreceived from: tuqrvowp.fxmzkvqo@gmail.com... tuqrvowp fxmzkvqo GRP_30
2755 top urgent! //price issue \n\nreceived from: wktesmbp.lorjymef@gmail.com... wktesmbp lorjymef GRP_29
2759 india plant - india (): node company-ap-ind-kk... india plant - india (): node company-ap-ind-kk... jyoqwxhz clhxsoqy GRP_8
2795 skype problem \r\n\r\nreceived from: pcjtisrv.havyuwds@gmail... pcjtisrv havyuwds GRP_0
2798 can you please remove my email from this maili... can you please remove my email from this maili... btvmxdfc yfahetsc GRP_0
2812 chrthryui stavenheim : unbale to login to tess... from: oinqckds qieswrfu \nsent: thursday, sept... oinqckds qieswrfu GRP_12
2832 ordnerfreigabe für m: kvp3 ordnerfreigabe für m: kvp3 nemzycxb xpsgkahw GRP_34
2838 ich benötige hilfe bei der passwortänderung ... ich benötige hilfe bei der passwortänderung ... ecoljnvt lbdqmvfs GRP_0
2845 purchasing catalogue de-wollschläger needs im... the company is bankruped rhinvtua aquyjfbs GRP_29
2853 reset the password for bzwrchnd ysfiwvmo on wi... reset the password for bzwrchnd ysfiwvmo on wi... bzwrchnd ysfiwvmo GRP_0
2865 vpn 不能登录。 [‎2016/‎9/‎29 9:14] daisy huang: \r\nhi,... lzaqjxgi lzfycegm GRP_31
2866 usa - (company) : interface fastethernet0/16 &... interface: fastethernet0/16 · asheshopsw5 on ... jyoqwxhz clhxsoqy GRP_8
2879 mobile device activation from: tsbnfixp numwqahj \nsent: wednesday, sep... tsbnfixp numwqahj GRP_0
2892 the termination action for manuel zuehlke has ... hello , \r\n\r\na kündigung for manuel zuehlk... clyauqjw cxwrsflt GRP_2
2909 missing travel privileges \n\nreceived from: uyrpdvoq.mbzevtcx@gmail.com... uyrpdvoq mbzevtcx GRP_10
2946 lese und schreibberechtigung für HostName_753... lese und schreibberechtigung für HostName_753... veyduami hjxpwqbi GRP_34
2952 zlz agreements 4111244546 + 4111337261 don´... \r\n\r\nreceived from: qbewrpfu.lwibmxzo@gmail... qbewrpfu lwibmxzo GRP_13
2954 drucker vh77 und vh79 funktionieren nicht. tic... \r\n • printer name / make - model? (ex ... tcepzdhi ymbirlod GRP_33
2958 pc eemw8144241 einrichten damit der barcode an... pc eemw8144241 einrichten damit der barcode an... djilqgmw bidchqsg GRP_42
2960 ordnerfreigabe für m: kvp3 und ce_leiter ordnerfreigabe für m: kvp3 und ce_leiter nemzycxb xpsgkahw GRP_34
2968 viewer for step files \r\n\r\nreceived from: azyfsrqh.wkavqigu@gmail... azyfsrqh wkavqigu GRP_0
2975 ??????????????????? \r\n\r\nreceived from: yzbjhmpw.vzrulkog@gmail... yzbjhmpw vzrulkog GRP_0
2980 hp2热压炉数据传输卡,数据更新不å... hp2热压炉数据传输卡,数据更新不å... basqoyjx frvwhbse GRP_30
2986 passwort frau koburvmc jwzlebap #51117 \r\n\r\nreceived from: mobaidfx.gviwlsrm@gmail... mobaidfx gviwlsrm GRP_0
2991 erp access issue p 11, passwort zurück setzen system (SID_34, SID_37, SID_39, SID_38, hrp, o... ukqoiswv unjarfoq GRP_2
2993 outlook - indizierung ich kann seit ca. 1 woche meine suche im outlo... qjsydrfo oeicyswt GRP_0
3007 win8.1 系统提示更新后,所有应用软... win8.1 系统提示更新后,所有应用软... bxtdalsj rnkvcshb GRP_31
3010 用友-库存帐和现存量不符 用友-库存帐和现存量不符,产品编... qypsxviu xagqtblv GRP_48
3040 dealer open orders are appearing in vkm4 hi,\n\nplease raise it ticket. all dealer open... miecoszw mhvbnodw GRP_10
3042 pr - workflow from: grhryueg dewicrth \nsent: tuesday, septe... kehtxprg uekapfzt GRP_29
3081 printer problem / issue information please complete all required questions below. ... owhuxbnf sxbgyrou GRP_0
3083 termination for user lisfgta geitrhybler termination for user lisfgta geitrhybler\n\n\n... mobaidfx gviwlsrm GRP_2
3086 employee termination foulgnmdia pgsqwrumh langmar hello , \n\na kündigung foulgnmdia pgsqwrumh ... mobaidfx gviwlsrm GRP_2
3088 probleme bei der projekt eingabe im collaborat... dear it,\r\n\r\nstill some issues with the tra... difozlav dgbfptos GRP_0
3106 urgent // price update error \r\n\r\nreceived from: uarnkqps.gufcjxma@gmail... uarnkqps gufcjxma GRP_13
3109 finance_app \r\n\r\nreceived from: bujiesrg.zopcrshl@gmail... bujiesrg zopcrshl GRP_55
3110 password for w-lan anrgtdy bofffgtyin \n\nreceived from: dubpgacz.kjzhilng@gmail.com... dubpgacz kjzhilng GRP_0
3120 电脑硬盘故障,请求维修。 电脑硬盘故障,请求维修。 ruhbyzpv vlksnjti GRP_30
3133 hr_tool-etime 系统不能登录,一直显ç¤... hr_tool-etime 系统不能登录,一直显ç¤... qekyowtv qdjixvkh GRP_31
3137 邮箱无法启动 邮箱无法启动,提示无法创新新的å... bzypjigs qokwvgyn GRP_30
3150 alrthyu s lowe. my ee# is 6045304.it needs to ... from: scthyott lortwe \nsent: monday, septembe... sbvlxuwm yanbikrx GRP_9
3162 cópia de conta telefonica solicito fornecer cópia da conta telefônica ... dughqrnf mebofqhz GRP_62
3168 the kündigung action for pfgia scgtitt has co... hello , \n\na kündigung for pfgia scgtitt, 11... vmthcrkf iceyusnd GRP_2
3169 mii logging in under different user issue reported from usa, usa and usa\n\nnote f... entuakhp xrnhtdmk GRP_41
3174 partner rtr team - email restrictions \n\nreceived from: ntsowaem.jfgslyde@gmail.com... ntsowaem jfgslyde GRP_26
3224 archivierung von e-mails \n\nreceived from: anivdcor.rbmfhiox@gmail.com... anivdcor rbmfhiox GRP_0
3236 uacyltoe hxgaycze message hallo liebe kollegen,\r\n\r\nwarum habe ich di... fmhlugqk dpraethi GRP_0
3250 network printer – wy85 issue – no print out \r\n\r\nreceived from: rjanhbde.owfkyjcp@gmail... rjanhbde owfkyjcp GRP_0
3259 reinstall company barcode für ewew8323753 \zl... reinstall company barcode für ewew8323753 \zl... zlqfptjx xnklbfua GRP_24
3275 problems with approval in universal work list \n\nreceived from: dkinobsv.wymgzcrh@gmail.com... dkinobsv wymgzcrh GRP_2
3279 support für rechner messvorrichtung \jionmp... support für rechner messvorrichtung \jionmp... jionmpsf wnkpzcmv GRP_24
3282 rechner für viotto funktioniert nicht rechner für viotto funktioniert nicht jionmpsf wnkpzcmv GRP_24
3295 delivery note creation request \r\n\r\nreceived from: fbvpcytz.nokypgvx@gmail... fbvpcytz nokypgvx GRP_6
3297 outlook - keine rückmeldung outlook\r\ni'm not able to log-on to outlook t... atlwdyej vtlhzbix GRP_0
3300 outlook无法登陆 ,一直显示正在启动。 outlook无法登陆 ,一直显示正在启动。 eyoqnkjl gakhdebf GRP_30
3303 百度搜索后-新网页打不开 \n\nreceived from: ktghvuwr.uwtakcmj@gmail.com... ktghvuwr uwtakcmj GRP_31
3305 anzeigen der bestellübersicht im erp netweave... im erp netweaver portal ist es nicht mehr mög... dnjxilqu povreyhq GRP_0
3307 erp系统登录后,采购页面无法显示"... erp系统登录后,采购页面无法显示"... lszhkxoy blemoyjd GRP_30
3315 电脑系统启动蓝屏。 电脑系统启动蓝屏。水不小心洒到ç... hdungfsc znuhyjkx GRP_31
3344 request to reset microsoft online services pas... from: microsoft on behalf of company inc. [mai... dcaokyph vrdnocxs GRP_0
3382 support with \n\nreceived from: jogtse.mhytusa@company.com\... kwyozxgd gasxctph GRP_25
3397 interface: fastethernet0/6 · company-johthryu... interface: fastethernet0/6 · company-johthryu... jloygrwh acvztedi GRP_4
3407 usa village - clappdico: company-na-usa-clapp... usa village - clappdico: company-na-usa-clapp... oldrctiu bxurpsyi GRP_8
3424 rechner ewewx212455 ( olympus ) kein zugriff a... rechner ewewx212455 ( olympus ) kein zugriff a... ughzilfm cfibdamq GRP_24
3431 schreib- und leseberechtigung auf den ordner ... hallo ,\r\n\r\nbitte schreib- und leseberechti... htvepyua izgulrcf GRP_24
3433 info type '0017' missing to personal number 56... from: fbgetczn jlsvxura \r\nsent: friday, sept... fbgetczn jlsvxura GRP_10
3439 erp pur - wrong subcontracting demand 2nd mate... hello,\r\nwith component 2433384 we see a wron... tckyrinp vbzqslco GRP_29
3444 keine netzwerkverbindung für rechner vhw7020 bitte netzwerkverbindung für rechner vhw7020 ... wogicpdt jbdyzmhf GRP_33
3446 account "helftgyldt" gesperrt anmeldung bei account "helftgyldt" nicht mögl... kbcedtiq jxnzpgwe GRP_0
3450 account "jncvkrzm thjquiyl" gesperrt anmeldung bei account "jncvkrzm thjquiyl" nich... kbcedtiq jxnzpgwe GRP_0
3453 account lock release request of "nakagtwsgs" hi team,\n\nplease unlock the windows account ... gworzkhc inypbucg GRP_0
3457 password reset \n\nreceived from: bctypmjw.cbhnxafz@gmail.com... bctypmjw cbhnxafz GRP_0
3479 request to reset microsoft online services pas... from: microsoft on behalf of company inc. [mai... vmdwslkj exvcknbp GRP_0
3481 quoting engine \n\nreceived from: iauqlrjk.nijdaukz@gmail.com... iauqlrjk nijdaukz GRP_14
3489 auf den totmannhandys ist eine störung. auf den totmannhandys ist eine störung.\r\n" ... tfrbwoua aegpkruc GRP_42
3511 erp vc - configair application in SID_1 uacylt... configair server in uacyltoe hxgaycze environm... iavozegx jpcudyfi GRP_14
3544 auflösung am bildschirm von pc eemw 8143337 s... auflösung am bildschirm von pc eemw 8143337 s... egklxsoy hivwzjcf GRP_42
3551 printer problem / issue information please complete all required questions below. ... ploxzuts utvimnwo GRP_0
3572 probleme mit outlook \kwehgxts agdsqbwv hallo ,\r\n\r\nich möchte gerne das in der su... kwehgxts agdsqbwv GRP_24
3578 your account hello,\r\n\r\ni'm unable to login on the bcd t... anuxbyzg bvsqcjkw GRP_0
3581 usa - (company inc.):gigabitethernet2/0/49 -in... usa - (company inc.): interface: gigabitethern... oldrctiu bxurpsyi GRP_4
3582 erp - SID_34 - long response time + long runtime several users in fürth including me are exper... cfajzero vlygoksi GRP_6
3585 EU_tool problem - urgent- \r\n\r\nreceived from: xnlapdeq.wupaeqlv@gmail... xnlapdeq wupaeqlv GRP_25
3590 full access to oe drive fürth \r\n\r\nreceived from: scjxobhd.ldypjkmf@gmail... scjxobhd ldypjkmf GRP_34
3594 bitte erstellen sie mir eine liste über alle ... bitte erstellen sie mir eine liste über alle ... ukvlnrwb fiyeczmh GRP_0
3603 apac company:fastethernet0/48 - company-ap-chn... apac company: company-ap-chn-apac-shop-closet... oldrctiu bxurpsyi GRP_4
3631 usa:interface down on tengigabitethernet1/5 te... interface down - on tengigabitethernet1/5 ·... oldrctiu bxurpsyi GRP_4
3633 berechtigung für ordner einrichten ich benötige für folgende personen aus germa... xtfniscy ecoyksda GRP_33
3645 答复: 答复: order products online problem \r\n\r\nreceived from: fkdazsmi.yecbrofv@gmail... fkdazsmi yecbrofv GRP_0
3660 please activate my new company owned samsung s... \r\n\r\nreceived from: nwfoucba.dzbujamc@gmail... nwfoucba dzbujamc GRP_0
3666 infopath links to discount forms do not open hello,\r\ndiscount team in poznań is unable t... ahydmrbu fjymgtvo GRP_0
3675 due to new hardware usa access to exchange acc... die synchronisierung mit exchange activesync i... wjbanovh mohgutiw GRP_0
3680 monitor in der stäbe-endkontrolle defekt monitor in der stäbe-endkontrolle defekt kiqrvwat gwkpxzyt GRP_33
3681 user password \r\n\r\nreceived from: bswlorek.yhdrlgbs@gmail... bswlorek yhdrlgbs GRP_0
3684 maus defekt . hallo ,\r\nich und howfanzi siavgtby brauchen... tkhaymqg cwuqzyvm GRP_24
3685 maus defekt hallo ,\r\nich und howfanzi siavgtby brauchen... howfanzi siavgtby GRP_24
3686 probleme mit symantec \ konferenzraum stäbe \... probleme mit symantec \ konferenzraum stäbe \... nozahtbr ubznqpsy GRP_24
3703 order products online problem from: -ebusiness service \nsent: wednesday, se... fkdazsmi yecbrofv GRP_0
3704 erp无法登录,提示logon balancing error ... gartryhu:\r\n你好,从昨天晚上开始ï... agzswjku kqwofdjl GRP_31
3713 e-time 系统java更新-转贺正平 \n\nreceived from: tuqrvowp.fxmzkvqo@gmail.com... tuqrvowp fxmzkvqo GRP_30
3715 maschinenstillstand ma 7 kesm , fernwartung no... maschinenstillstand ma 7 kesm , fernwartung no... fiwaltqr utykjmwi GRP_33
3721 leantracker anmeldung funktioniert nicht lean tracker öffnet nicht. fehlermeldung ersc... hwfckjzs abxdmyho GRP_0
3730 request to reset microsoft online services pas... from: microsoft on behalf of company inc. [mai... afijkocw rjtxslpa GRP_0
3738 电话故障 铸棒车间电话故障,39523850 sbkhjigv pbvlfcse GRP_48
3740 ups故障 制粉车间3楼psf交换机处ups故障,设... agyvbnwz mxsonkdc GRP_48
3748 telefon defekt gigaset m2 mit der durchwahl 479\r\nakku hält... ptvdxwla tlevwmzo GRP_33
3768 password reset from: microsoft on behalf of company inc. [mai... goyvcped sxbgiajh GRP_0
3803 vip 2: collaboration_platform access problem \n\nreceived from: kzbuhixt.zjdmoahr@gmail.com... kzbuhixt zjdmoahr GRP_0
3820 account auf iphone löschen und neu anlegen. da durch den zugriff per iphone auf den mail a... jxphgfmb gjbtuwek GRP_24
3821 passwort muß nochmals zurückgestzt werden. vermutlich hat der zugriff mit dem iphone das ... jxphgfmb gjbtuwek GRP_24
3832 bitte im laufwerk germany unter m:\berechnungs... bitte im laufwerk germany unter m:\berechnungs... rphoiduv hfbevpir GRP_12
3837 zugriff auf verlauf m:\skv-alicona. hallo ,\n\nfolgende mitarbeiter benötigen zug... hqyfebtd pkmyrdga GRP_24
3840 drucker em26 geht nicht mehr drucker em26 geht nicht mehr trotz mehrere dru... djilqgmw bidchqsg GRP_42
3845 brauche freigabe für m:\skv-alicona. brauche freigabe für m:\skv-alicona. /hgrvubz... hgrvubzo wgyhktic GRP_12
3848 brauche freigabe für m:\skv-alicona. brauche freigabe für m:\skv-alicona. /lxfnwyu... lxfnwyuv bqmjyprz GRP_12
3849 bitte freigabe für m:\skv-alicona einrichten. bitte freigabe für m:\skv-alicona einrichten.... nfybpxdg yjtdkfuo GRP_12
3850 bitte freigabe für m:\skv-alicona einrichten. bitte freigabe für m:\skv-alicona einrichten.... rhaycqjg arcgonvy GRP_12
3859 telefon defekt. gigaset charger for sl3 profes... ft584095 octophon sl3 prof. ladeschale mnr 513... txkgmwbc qohmgwrp GRP_33
3867 passwort ist abgelaufen. bitte reset einleiten... passwort ist abgelaufen. bitte reset einleiten... jxphgfmb gjbtuwek GRP_24
3870 erp business_client 不能使用,更新msdot... 更新msdotnet_461_business_client_50_SID_792 . okebwncv zyxvwkpn GRP_31
3874 i can't reiceve e-mail form my mobile phone. could you reset my company mobile phone. detai... wmrozfpt ltgcofqp GRP_0
3903 电脑无法连接公共盘,请帮我转给å... 电脑无法连接公共盘,请帮我转给å... basqoyjx frvwhbse GRP_30
3944 crm add-in / outlook \r\n\r\nreceived from: mdbegvct.dbvichlg@gmail... mdbegvct dbvichlg GRP_0
3957 sandplant_162 - fc9/15 · drlab HostName_1... sandplant_162 - fc9/15 · drlab HostName_1... rkupnshb gsmzfojw GRP_8
3966 user switzerlandim blocked in netweaver \r\n\r\nreceived from: vogtfyne.isugmpcn@gmail... xeoycjsu iuasmloj GRP_0
3971 connection issues with secomea site manager well i have fighting with this all week. our s... sxzuctga qnwhyapd GRP_4
3987 keine anmeldung auf skype möglich keine anmeldung auf skype möglich /vzqomdgt j... vzqomdgt jwoqbuml GRP_24
3997 tonerpatrone em01 / germany alle \r\n\r\nreceived from: yzodcxkn.zyewibop@gmail... yzodcxkn zyewibop GRP_42
4027 connection to finance_app data base \r\n\r\nreceived from: naisdxtk.mqzvewsb@gmail... naisdxtk mqzvewsb GRP_55
4028 传真机不清楚 to 小贺:传真机传真出来有很粗的é... xqyjztnm onfusvlz GRP_30
4035 der mitarbeiter schrenfgker heinrifgtch (pn 50... der mitarbeiter schrenfgker heinrifgtch (pn 50... mdkbjzrt qaxhznvy GRP_25
4039 unable to create stock recall form \r\n\r\nreceived from: rjanhbde.owfkyjcp@gmail... rjanhbde owfkyjcp GRP_0
4040 eeml8143294 / wlan lässt sich nicht per knopf... eeml8143294 / wlan lässt sich nicht per knopf... mvwiygou rpkscnlv GRP_42
4042 vpn 不能连接 vpn 不能连接 fcvqezmn frzyjknu GRP_31
4046 登录进去后无法新建客户信息,无æ... ~connect_ui/setting/view?brand=kd&language=zh-... pfiyvdea uwbdsfmr GRP_31
4054 ie 浏览器问题,搜索网页后不能打开。 ie 浏览器问题,搜索网页后不能打开。 napijrez xhpqkojc GRP_31
4059 circuit outage:·2 mbps internet link to telec... what type of outage: _____network __x___c... jyoqwxhz clhxsoqy GRP_8
4074 ooo until 30.9.2016 : engineering_tool \r\n\r\nreceived from: wsczgfal.hjfklsdg@gmail... wsczgfal hjfklsdg GRP_0
4082 apac company: fastethernet0/48 · uplink to c... apac company: company-ap-chn-apac-shop-closet... oldrctiu bxurpsyi GRP_4
4090 热压炉4号plc控制器通讯模块网络地... 今年发生了5次网络地址丢失,需要... pwfirvgu ydwlhuzi GRP_30
4091 reset passwords for hgyvopct dhckfmbq using pa... 请将密码设置为:sdguo1609 hgyvopct dhckfmbq GRP_17
4098 电脑意外进水,帮助处理!请交小è... 电脑意外进水,帮助处理!请交小è... pvfclkmn gebyipwr GRP_30
4100 erp logon \n\nreceived from: rujpckto.lhutkpxm@gmail.com... rujpckto lhutkpxm GRP_0
4117 fwd: die synchronisierung mit exchange actives... \r\n\r\nreceived from: nsoikcyf.jhybqael@gmail... nsoikcyf jhybqael GRP_0
4123 kein internetsignal from: itjzudor ybtmorxp \nsent: saturday, sept... itjzudor ybtmorxp GRP_0
4150 laptop _ screen problem. \n\nreceived from: rohjghit.kumghtwar@company.... tgseqfni ehlabdtf GRP_19
4168 solicito a instalação de software adobe acrobat solicito a instalação do software adobe acro... vhjkdqop tkhafgrc GRP_62
4170 vendor and customer balance in local currency ... help desk,\r\n\r\nin fy2017, the end of month ... zsqabokr xbtsaodr GRP_10
4190 an der maschine 15 (agathon combi) funktionier... hallo \r\n\r\nan der maschine 15 (agathon comb... vfuytnwp fyzhntag GRP_24
4199 re: revised prices - local \n\nreceived from: ryculmsd.wofgvkrb@gmail.com... ryculmsd wofgvkrb GRP_26
4209 network access for vahjtusa wenghtyele for fü... folder :; \\eagcldaten\teams\gpc\naccess read ... rhinvtua aquyjfbs GRP_34
4224 vpn access issue hello team,\n\nyvhlenaz ptuqhrwk (on cc) canno... yvhlenaz ptuqhrwk GRP_0
4230 carrier information lost when bulk indicator i... ship method information is lost when bulk ship... gzawrocy shbgwxep GRP_18
4233 gtehdnyushot kennconnect problem \r\n\r\nreceived from: dxnskvbm.xbaswghy@gmail... dxnskvbm xbaswghy GRP_0
4234 milano,italy: duplex mismatch gi2/0/1 on 1811... duplex mismatch: duplex mode on interface giga... mnlazfsr mtqrkhnx GRP_4
4280 ethics issue. \r\n\r\nreceived from: fmzdkyqv.dbrslnhe@gmail... fmzdkyqv dbrslnhe GRP_23
4286 skype issues \r\n\r\nreceived from: wauhocsk.vxuikqaf@gmail... wauhocsk vxuikqaf GRP_0
4301 wichtig: bitte netzwerke synchronisieren !! \n\nreceived from: nhsogrwy.qkxhbnvp@gmail.com... wacxhqvs nxdythgc GRP_12
4304 interface: gigabitethernet1/0/47 · mtb gf wir... interface: gigabitethernet1/0/47 · mtb gf wir... rkupnshb gsmzfojw GRP_4
4312 printer problem / issue information - reroute ... please complete all required questions below. ... yfmaqovp wdonhbez GRP_5
4314 it - speker for skype conferences it - germany:\r\n\r\nskype speakers are requir... atlwdyej vtlhzbix GRP_33
4325 wg: zeichnung 5685567645 \n\nreceived from: dnwfhpyl.zqbldipk@gmail.com... dnwfhpyl zqbldipk GRP_11
4334 system disk c: of server HostName_698 is full ... \r\n\r\nreceived from: fbyusmxz.kxvmcbly@gmail... fbyusmxz kxvmcbly GRP_12
4337 mr tmqfjard qzhgdoua, ordinary no. 100937, req... fyi\n\nvon: axesnghb cyzuomxa \ngesendet: donn... tmqfjard qzhgdoua GRP_59
4342 computer gets hot \r\n\r\nreceived from: ucphibmr.dfvkbtsj@gmail... ucphibmr dfvkbtsj GRP_28
4344 monitor defekt hallo ,\r\n\r\nder monitor von ahmet gök im m... vaigycet jtgmpdcr GRP_24
4351 erp-engineering tool \n\nreceived from: bqapjkcl.ljeakcqf@gmail.com... bqapjkcl ljeakcqf GRP_0
4358 an terminal 12 bei iso-u können im EU_tool ke... an terminal 12 bei iso-u können im EU_tool ke... tiefszyh sfujdlgv GRP_42
4382 printer problem / issue information please complete all required questions below. ... kpogxqvn sfzjbhet GRP_3
4398 erp user blocked, please unlock immediately & ... \n\nreceived from: lixwgnto.krutnylz@gmail.com... lixwgnto krutnylz GRP_0
4436 bearbeitung der aktuellen situation nicht mög... \r\n\r\nreceived from: znqlmjvt.uhyokzlt@gmail... znqlmjvt uhyokzlt GRP_16
4441 business_client not responding \r\n\r\nreceived from: rjanhbde.owfkyjcp@gmail... rjanhbde owfkyjcp GRP_0
4443 printer problem / issue information i am tryin... please complete all required questions below. ... jusenflm sufbehom GRP_0
4452 service kit 8150 für we01 liefern \hcuixqgj m... service kit 8150 für we01 liefern \hcuixqgj m... hcuixqgj mavxgqbs GRP_24
4459 excel issue \r\n\r\nreceived from: anuxbyzg.bvsqcjkw@gmail... anuxbyzg bvsqcjkw GRP_16
4460 freischaltung des ordners "ce ap daten" im lau... bitte für oben genannten ordner schreib- und ... qfetblky iwflmhuc GRP_34
4461 efdl8111218 - outlook issue \r\n\r\nreceived from: wcnfvajb.kxylsamv@gmail... wcnfvajb kxylsamv GRP_0
4471 erpp11 locked \n\nreceived from: zuxcfonv.nyhpkrbe@gmail.com... zuxcfonv nyhpkrbe GRP_0
4473 drucker em93 (bei frau zeilmann) defekt: das p... drucker em93 (bei frau zeilmann) defekt: das p... ltxzfcgm sxvigclz GRP_42
4475 erp logon password \n\nreceived from: ryculmsd.wofgvkrb@gmail.com... ryculmsd wofgvkrb GRP_0
4479 vpn 不能连接,提示无法找打防病毒... vpn 不能连接,提示无法找打防病毒... nhrwdaep orvxhyiw GRP_31
4481 need support \r\n\r\nreceived from: ehfvwltg.eakjbtoi@gmail... ehfvwltg eakjbtoi GRP_29
4482 bw server error message hello team,\n\none of company user, uxndyfrs v... uxndyfrs vahxnfgl GRP_9
4487 vpn 连接后自动断开。 vpn 连接后自动断开,提示调制解调... tmufgokq qtzavows GRP_31
4501 制粉三楼控制室电脑故障 制粉三楼控制室电脑不能开启,电æ... agyvbnwz mxsonkdc GRP_48
4502 铸造车间电脑故障 铸造车间记录生产数据的电脑不能å... ncwfeytj tumlheoi GRP_48
4503 用友定期备份不成功 服务器端用友系统最近半个月日备ä... igdnsjhz awnftgev GRP_48
4535 please activate my new iphone, it is a company... from: nwfodmhc exurcwkm \nsent: tuesday, septe... fmzdkyqv dbrslnhe GRP_0
4560 printer problem / issue information - recurrin... please complete all required questions below. ... rxqtvanc kthqwxvb GRP_12
4570 company-na-usa-usa-switch-tc02-1-2960-access-s... company-na-usa-usa-switch-tc02-1-2960-access-s... oldrctiu bxurpsyi GRP_8
4571 maschinen pc von r241 im bereich kentip lässt... maschinen pc von r241 im bereich kentip lässt... ljxzyriq zqxkrcev GRP_33
4572 erp probleme wenn ich am rechner evh8114148 im erp eine zei... jnktafrs ytxiowbh GRP_0
4576 zurücksetzen auf 12.09.2016 \n\nreceived from: cflrqoew.qbgjwaye@gmail.com... cflrqoew qbgjwaye GRP_24
4578 zeiterfassung funktioniert nicht \we_qs \quali... zeiterfassung funktioniert nicht \we_qs \quali... xwirzvda okhyipgr GRP_24
4579 scanner metroligic für rechner we_wu160 defek... canner metroligic für rechner we_wu160 defekt... jionmpsf wnkpzcmv GRP_24
4580 install company barcode für ewew8323504 \vzqo... install company barcode für ewew8323504 \vzqo... vzqomdgt jwoqbuml GRP_24
4592 user tfazwrdv upwonzvd’s pc name has been ch... \r\n\r\nreceived from: sthyuraj.sektyhar@compa... tfazwrdv upwonzvd GRP_7
4593 ethics \r\n\r\nreceived from: gasbfqvp.fmvqgjih@gmail... gasbfqvp fmvqgjih GRP_23
4599 outloot 老是提示输入密码,更改密ç... outloot 老是提示输入密码,更改密ç... cnxbswhv qmtngeyh GRP_30
4604 gr error \r\n\r\nreceived from: gkwcxzum.answkqpe@gmail... gkwcxzum answkqpe GRP_29
4607 could you resolve item category error during z... \r\n\r\nreceived from: fbvpcytz.nokypgvx@gmail... fbvpcytz nokypgvx GRP_13
4644 mobile device activation from: puxsvfwr cwkjruni \r\nsent: monday, sept... puxsvfwr cwkjruni GRP_0
4649 urgent : with reference to ticket : inc1549876 uyjlodhq ymedkatw (lghuiezj3) has mapped the u... cxqldoub vkgpatsu GRP_39
4669 bei drucker em98 papiertransport defekt beim transport des druckerpapiers treten in re... tczxaubk aupnwqoi GRP_42
4674 missing data in bex productmanagement - urgent... \r\n\r\nreceived from: uwofavej.hxyatnjc@gmail... uwofavej hxyatnjc GRP_9
4684 passwörter wothyehre \r\n\r\nreceived from: dgwrmsja.jzlpwuit@gmail... dgwrmsja jzlpwuit GRP_0
4695 bitte die schreib / leseberechtigung für ord... guten morgen,\r\n\r\nbitte die schreib / leseb... htvepyua izgulrcf GRP_24
4712 ooo - till 4 oct 2016 it problem - engineering... \r\n\r\nreceived from: mikhghytr.sperhake@comp... qvncizuf ueiybanz GRP_0
4714 der rechner von unseren schichtführern h.gabr... hallo \r\n\r\nder rechner von unseren schichtf... jionmpsf wnkpzcmv GRP_24
4715 rechner für messvorrichtung steli funktionier... rechner für messvorrichtung steli funktionier... jionmpsf wnkpzcmv GRP_24
4717 probleme mit öffnen m:\wsb_exe \we_wu102 \pfj... probleme mit öffnen m:\wsb_exe \we_wu102 \pfj... pfjwinbg ljtzbdqg GRP_24
4719 datenlogger opus20 thip q475024 kann keine ver... datenlogger opus20 thip q475024, wird zur übe... qkedpfyj qechgaty GRP_33
4721 requirement of internet access for machine pc ... \n\nreceived from: dfiyvmec.wxioadpt@gmail.com... dfiyvmec wxioadpt GRP_19
4723 转发: 以色列修磨/rma6001502596 \n\nreceived from: wktesmbp.lorjymef@gmail.com... wktesmbp lorjymef GRP_10
4734 reset the password for mobaidfx gviwlsrm on er... passwort zurücksetzen - mobaidfx gviwlsrm GRP_0
4735 handscanner am rückmeldeterminal 6 defekt handscanner am rückmeldeterminal 6 defekt (ge... mhvbnqri sacvgzxf GRP_33
4827 reparo adobe pdf os recibo gerados em pdf estão saindo com car... frjpkuwq rtznexbq GRP_0
4828 design_tool service: file naming convention ha... the geengineering_tooloductdata web service ca... fyuqhlcx fjiuhxae GRP_25
4831 approval workflow problems \r\ni am approfghaching you, as we have proble... jwqyxbzs adpvilqu GRP_13
4853 bahdqrcs xvgzdtqj's onbankrding experience \r\n\r\nreceived from: xzupryaf.vlbikhsm@gmail... xzupryaf vlbikhsm GRP_0
4855 interface: fc3/34 · HostName_1147 on sandplan... interface: fc3/34 · HostName_1147 on sandplan... jloygrwh acvztedi GRP_8
4862 skype - meetinmg button \n\nreceived from: blktuiae.jzakfmhw@gmail.com... blktuiae jzakfmhw GRP_0
4882 install company barcode für ewew8323506 \vzqo... install company barcode für ewew8323506 \vzqo... vzqomdgt jwoqbuml GRP_24
4884 答复: shipments iak \r\n\r\nreceived from: wqzarvhx.hfsojckw@gmail... wqzarvhx hfsojckw GRP_18
4888 arbeitsplatz: pc+telefon in germany im büro k... auf grund das ich ab jetzt einen tag in der wo... wyotidgu nydzrtuw GRP_33
4891 erp transaktion pr05 funktioniert nicht \n\nreceived from: ida.financial@company.com\n... xfznctqa xstndbwa GRP_10
4894 wifi guest account \n\nreceived from: scjxobhd.ldypjkmf@gmail.com... scjxobhd ldypjkmf GRP_0
4897 ordner mbs \r\n\r\nreceived from: pnroqajb.psbyfhkg@gmail... pnroqajb psbyfhkg GRP_0
4899 please help enable the o365 video service die angehängten video die ich per e-mail übe... ymoeqrsx rbctdsyi GRP_16
4908 outlook 打开启动提示错误。 outlook 打开启动提示错误。 mpjoszqg wurpohmf GRP_31
4910 india , company-ap-ind--pu5-lean-2960s-stack-s... india , company-ap-ind-kirty-pu5-lean-2960s-st... mnlazfsr mtqrkhnx GRP_8
4911 转发: finished: start of s&op process \r\n\r\nreceived from: lkrfndev.kztlojin@gmail... lkrfndev kztlojin GRP_0
4916 netzwerkkabel des erp druckers verlängern, netzwerkkabel des erp druckers em 19 verlänge... tfrbwoua aegpkruc GRP_42
4926 erp runtime error.....this is what i'm eventua... \r\n\r\nreceived from: jbfmsxik.mfzjncva@gmail... jbfmsxik mfzjncva GRP_0
4963 lizenz let's talk- video kann nicht geöffnet werden lfaqrivo bmitazrx GRP_0
4965 employee owned mobility agreement \r\n\r\nreceived from: rmzlvqjf.eaqyxljb@gmail... rmzlvqjf eaqyxljb GRP_0
4970 drucker ng15 funktioniert nicht mehr (hardware... please complete all required questions below. ... ogasxnpw cfvqrhap GRP_33
4971 problem in engineering tool \r\n\r\nreceived from: zaeduhlt.jdgsamtv@gmail... zaeduhlt jdgsamtv GRP_25
4974 travel expenses in erp \n\nreceived from: uyrpdvoq.mbzevtcx@gmail.com... uyrpdvoq mbzevtcx GRP_10
4979 rechner für messvorrichtung steli funktionier... rechner für messvorrichtung steli funktionier... jionmpsf wnkpzcmv GRP_24
4981 usb verlängerungskabel liefern .\hxwtidja ixa... usb verlängerungskabel liefern .\hxwtidja ixa... hxwtidja ixahzmvf GRP_24
4983 iehs metrics input - unable to scroll (urgent ... \r\n\r\nreceived from: fniqhjtg.qrfuetpw@gmail... fniqhjtg qrfuetpw GRP_34
4985 kein rechnungseingang beim kunden per e-mail kunde 81125605 / cc5202 erhält unsere rechnun... hgufmidr mfobkyun GRP_13
4987 open tickets - günter steinhäußer \r\n\r\nreceived from: rtnzvplq.erhmuncq@gmail... rtnzvplq erhmuncq GRP_26
4992 probleme mit laser der pc (ganz alter pc) am halbautomaten schalt... sgnubadl gpkovbah GRP_24
4995 fc für departments\skv-alicona hallo marfhtyio.\nmit dem freischalten von srg... ypladjeu wzfryxav GRP_12
4997 fc auf departments\_scan\we47 guten morgen,\r\n\r\nbitte die schreib / leseb... htvepyua izgulrcf GRP_12
5009 we need for all participants an access for the... from: ylqvitsk bfnackrw \r\nsent: wednesday, s... ylqvitsk bfnackrw GRP_0
5010 action required: please connect to the new vpn... from: vivbhuek kanjdye \nsent: thursday, septe... zfliqpxm dgfvaqlh GRP_0
5013 转发: 订单号:5212346451可以签字了 \r\n\r\nreceived from: apacjun.zhang@company.c... qzbxfncr kysuqema GRP_29
5039 unknown emails from miltgntyuon knighdjhtyt \r\n\r\nreceived from: zxobmreq.udikorhv@gmail... zxobmreq udikorhv GRP_0
5068 freigabe auf ordner für ytcxjzue guplftok (kr... unter departments (\\HostName_579) (m:) unter ... lzpuyrvw zkxbacvn GRP_34
5077 reset passwords for césar abreu rghkiriuytes ... the btvmxdfc yfahetsc GRP_17
5091 printer problem / issue information please complete all required questions below. ... zgdvhfop kbrmfcog GRP_0
5101 data retrieval from backup server \n\nreceived from: vkzwibco.pueyvhoi@gmail.com... vkzwibco pueyvhoi GRP_12
5105 it help \r\n\r\nreceived from: scjxobhd.ldypjkmf@gmail... scjxobhd ldypjkmf GRP_28
5110 policy ? \r\n\r\nreceived from: hbmwlprq.ilfvyodx@gmail... hbmwlprq ilfvyodx GRP_39
5111 babiluntr \r\n\r\nreceived from: dhtxwcng.hruckmey@gmail... dhtxwcng hruckmey GRP_28
5123 collaboration_platform 里面打开"查看我ç... collaboration_platform 里面打开"查看我ç... lkrfndev kztlojin GRP_31
5130 companysecure user can't get into the network \n\nreceived from: ctzykflo.evzbhgru@gmail.com... ctzykflo evzbhgru GRP_0
5140 error while extending mm# from: kbyivdfz zwutmehy [mailto:kbyivdfz.zwutm... kbyivdfz zwutmehy GRP_29
5146 walkme加载故障 walkme下载安装后,按钮不能在浏览å... whflryeb fatgdzhq GRP_48
5147 电脑开机故障 原材料仓库电脑开机蓝屏,主机面æ... ycjxrsba ehftdorm GRP_48
5149 开不了机 开不了机,显示系统坏了。 kclhqspo xvugztyc GRP_30
5189 drucker ag71 in fürth \n\nreceived from: osjqfbvw.hlmgrfpx@gmail.com... osjqfbvw hlmgrfpx GRP_28
5204 employment status - three new non-employee [en... *page down to ensure that all required data fi... lbqgystk uezmfhsn GRP_2
5223 fc on departments\distribution moin marfhtyio,\r\n\r\nkannst du bitte herr lz... vzqomdgt jwoqbuml GRP_12
5231 dock station - required \n\nreceived from: jmoqelbc.fbzsyjne@gmail.com... jmoqelbc fbzsyjne GRP_19
5235 bobj - erp business objects \n\nreceived from: ctzykflo.evzbhgru@gmail.com... ctzykflo evzbhgru GRP_9
5240 lese- / schreibberechtigung für m:\kbt-auftrg... hallo marfhtyio,\r\n\r\nbitte richte mir die l... htvepyua izgulrcf GRP_12
5249 reset microsoft online services password from: microsoft on behalf of company inc. [mai... rmezbnqt ntbmkpuh GRP_0
5254 "\\HostName_625\departments\ehs_arbeitsmedizin... bitte den verantwortlichen des ordners "\\Host... ozphysqw pgcmwqze GRP_34
5257 删除了一个excel 文档,文档地址:\... 删除了一个excel 文档,"kmfg0042f01r00-... rjxbiplo jmhnleaq GRP_0
5266 reset microsoft online services password for j... from: microsoft on behalf of company inc. [mai... jhyazros azdxonjg GRP_0
5281 "scghhnelligkeit" meines internets plnvcwuq ik... \n\nreceived from: jclrangd.kjlnearz@gmail.com... jclrangd kjlnearz GRP_0
5285 restore o:\schaem\eigene dateien\db1.mdb hallo \n\nwürdest du bitte diese datei "o:\sc... cflrqoew qbgjwaye GRP_12
5286 restore directory schulzgth from public hallo marfhtyio,\r\n\r\n\r\nder ordner schulzg... vzqomdgt jwoqbuml GRP_12
5291 am keyence messgerät monitor ausgetauscht am keyence messgerät monitor ausgetauscht ugvkyalo ayofwjpi GRP_42
5294 tür- öffnerfunktion geht nicht. keine tür- öffnerfunktion bei folgenden zei... tgpvrbyi ztdxwpcn GRP_0
5303 报税电脑不能联网,让贺正平休。 报税电脑不能联网,让贺正平休。 hlrmufzx qcdzierm GRP_30
5310 ie浏览器无法打开。 ie浏览器无法打开。 dkxstwfq eljczgbn GRP_31
5311 系统故障,启动蓝屏. 系统故障,启动蓝屏. lhkqbmna ekhtivsd GRP_31
5321 power outage: usa tn, tm-usa-att-rtr circuit w... usa tn, tm-usa-att-rtr circuit went down at 0... mnlazfsr mtqrkhnx GRP_8
5371 unable to create erp notification \n\nreceived from: dfiyvmec.wxioadpt@gmail.com... dfiyvmec wxioadpt GRP_45
5396 interface fa0/21 on company-sa-bra-sao-pollaur... interface: fastethernet0/21 · 1721 router emb... dkmcfreg anwmfvlg GRP_8
5418 reset password \n\nreceived from: koahsriq.wdugqatr@gmail.com... koahsriq wdugqatr GRP_0
5429 urgent: samag lieferschein sales order #356125... \n\nreceived from: vujymcls.sgpmyviq@gmail.com... vujymcls sgpmyviq GRP_18
5440 password reset alert from o365 for user • m... password reset alert from o365 for user • m... lhrnyktm egilutwz GRP_0
5448 enterprise search connector for… is not work... \r\n\r\nreceived from: ucphibmr.dfvkbtsj@gmail... ucphibmr dfvkbtsj GRP_11
5449 company distributor_tool \r\n\r\nreceived from: pnabslgh.vatpgsxn@gmail... pnabslgh vatpgsxn GRP_0
5450 an prüfen \ we_wu113 \ essa presse \ box blin... an prüfen \ we_wu113 \ essa presse \ box blin... wrcktgbd wzrgyunp GRP_24
5451 setup rechnerf für infostand instandsetzung \... setup rechnerf für infostand instandsetzung \... niptbwdq csenjruz GRP_24
5453 externe festplatte zu verfügung stellen \alte... externe festplatte zu verfügung stellen \alte... edspmloy fxnkzaqu GRP_24
5456 interface: fastethernet0/27 vlan 51: lhqwx402... interface: fastethernet0/27 · vlan 51: lhqwx4... spxqmiry zpwgoqju GRP_4
5464 答复: 35969737/2032252 \r\n\r\nreceived from: wqzarvhx.hfsojckw@gmail... wqzarvhx hfsojckw GRP_13
5468 na remote is not working [‎9/‎2/‎2016 12:06 pm] mhfjudahdyue rfgr... jvhqyamt wodzrcjg GRP_2
5478 material issue now, the quantity changed to 3pcs, seems not s... wktesmbp lorjymef GRP_29
5484 configure e6430 unit for standard login for pr... \r\n\r\nreceived from: kbclinop.vsczklfp@gmail... kbclinop vsczklfp GRP_19
5485 printer problem / issue information please complete all required questions below. ... mfixrouy dyifhcjt GRP_0
5489 ie 浏览器主页被更改为酒店网站地å... ie 浏览器主页被更改为酒店网站地å... kcldufqe xghvrzoi GRP_31
5491 電腦出現藍屏,無法開機 連vpn時,無法連上後,重試後,突然出ç... zhpwcdea cboefuis GRP_31
5525 die synchronisierung mit exchange activesync die synchronisierung mit exchange activesync i... gferjcsh apqwniyr GRP_0
5547 plm \n\nreceived from: grkaqnzu.mldekqpi@gmail.com... grkaqnzu mldekqpi GRP_0
5580 outlook 不能启动。 outlook 不能启动。 ebkmczgy pbzfgcoa GRP_31
5581 engineering_tool and engineering_tool not working \n\nreceived from: cugjzqlf.djwbyact@gmail.com... cugjzqlf djwbyact GRP_0
5593 list of it team functions with names \n\nreceived from: kflqpite.gbeoqsnc@gmail.com... kflqpite gbeoqsnc GRP_0
5613 interface: fastethernet0/27 vlan 51: lhqwx4021... interface: fastethernet0/27 · vlan 51: lhqwx4... jyoqwxhz clhxsoqy GRP_8
5623 activity 4 and 5 efficiency too high reported ... note from omufjcxr ahypftjx - \r\n\r\n"do you ... entuakhp xrnhtdmk GRP_9
5634 issues to log into ethics course \r\n\r\nreceived from: fmzdkyqv.dbrslnhe@gmail... fmzdkyqv dbrslnhe GRP_23
5669 share collaboration_platform site \n[‎8/‎31/‎2016 1:03 pm] \n\n\n\nhi \n\n... qcehailo wqynckxg GRP_16
5677 change date settings-citrix-multiple users \n\nreceived from: iltcxkvw.dkwmxcgn@gmail.com... sgaczfvo wxmkrzfu GRP_12
5687 lentidão da máquina favor verificar a lentidão da minha máquina,... frjpkuwq rtznexbq GRP_62
5688 basis on-call / shift details... \r\n\r\nreceived from: mnxbeuso.rfmdlwuo@gmail... mnxbeuso rfmdlwuo GRP_0
5691 password locked \n\nreceived from: ebkfwhgt.flapokym@gmail.com... ebkfwhgt flapokym GRP_0
5696 xjmpacye qgxrptnf cannot send delivery notes v... dear it team, \r\nmy colleague cannot send her... xjmpacye qgxrptnf GRP_13
5697 printer problem / issue information please complete all required questions below. ... gljrdmnu yfnbkcmp GRP_0
5699 need access to my drive (please check her acco... \r\n\r\nreceived from: ybhazlqp.zfghsxiw@gmail... ybhazlqp zfghsxiw GRP_2
5700 additional corrections of sales org 1278 and ... 1. sales organisation address 1278 – phone n... mynfoicj riuvxdas GRP_13
5725 missrouting of printouts: check settings if p... \r\n\r\nreceived from: tkuivxrn.urdgitsv@gmail... qgopxabz xnuieqjr GRP_13
5729 ooo : !3.09.2016issues with mailbox " company" \n\nreceived from: fbyusmxz.kxvmcbly@gmail.com... fbyusmxz kxvmcbly GRP_0
5731 电脑运行速度超级慢,打开一个erpæ... \n\nreceived from: tuqrvowp.fxmzkvqo@gmail.com... tuqrvowp fxmzkvqo GRP_30
5742 engineering_tool上传不了 报警显示没有建立网络连接,但是v... molihtdq auprogsj GRP_31
5754 skype 不能登录,提示证书错误。 skype 不能登录,提示证书错误。 qekyowtv qdjixvkh GRP_31
5761 主机不能开启 主机不能开启,电源灯正常,主机é... cpdilmjx jwsqpiac GRP_48
5762 打开office 2013显示是未经授权产品 打开outlook、ppt显示是未经授权产品... hbvwqine eakqyovu GRP_48
5768 folder access for jywvemun qngschtz from: nwfodmhc exurcwkm \r\nsent: wednesday, a... naruedlk mpvhakdq GRP_34
5787 windows asks to install driver and then won't ... please complete all required questions below. ... rxqtvanc kthqwxvb GRP_0
5827 答复: ticket_no1538811::company center autho... \n\nreceived from: udzkgbwl.bgpedtqc@gmail.com... udzkgbwl bgpedtqc GRP_21
5828 problemas de configuração no quick quote as imagens não alteramdnty quando solicitadas... zpfitlyu cemvwyso GRP_62
5835 答复: ticket_no1538811::company center autho... \r\n\r\nreceived from: udzkgbwl.bgpedtqc@gmail... udzkgbwl bgpedtqc GRP_0
5841 s&op \n\nreceived from: uyrpdvoq.mbzevtcx@gmail.com... uyrpdvoq mbzevtcx GRP_0
5866 telephony_software 2016 r2 - please can you re... \r\n\r\nreceived from: hupnceij.hyozjakb@gmail... hupnceij hyozjakb GRP_7
5887 tablet - dell 7350 - 电脑播放音频文件æ... please provide details of the issue.\r\n电脑... riuhxcab jcsavihq GRP_31
5890 iphone上的skype不能登录不能参加会è®... iphone上的skype不能登录不能参加会è®... hprdlbxf nozjtgwi GRP_30
5891 vpn不能使用,请转给小贺 vpn不能使用,请转给小贺 ehfvwltg eakjbtoi GRP_0
5893 druckerzuordnung zum disponenten \r\n\r\nreceived from: icnjlzas.cvphuknj@gmail... icnjlzas cvphuknj GRP_5
5897 habe gestern mein passwort geändert. nun verb... kann anmeldetaten eingeben, aber fenster kommt... axeclkro snfmerdb GRP_0
5910 laptop 从桌子上面摔下,外壳摔裂,... laptop 从桌子上面摔下,外壳摔裂,... bwjhurqx akyuowev GRP_31
5913 邮箱登录不了 昨天提示更改密码,改好后可以登å... vSMfFXAJ OUAhWpDS GRP_30
5954 bitte um rückruf morgen um 7 uhr 30 mitteleur... \n\nreceived from: ptuchwad.yzvrlcqa@gmail.com... ptuchwad yzvrlcqa GRP_0
5982 accounts erstellen bitte von: gogtyekhan merdivan \ngesendet: montag, 2... qidgvtwa qvbutayx GRP_0
5996 laufzeitfehler bei hrp (hcm production) folgender fehler ist bei der erstellung der ze... rayklfcm bjfwckyx GRP_0
6017 open order schedule lines_p2016-08-28-22-03-54 hallo ruchitgrr, hallo frau haug,\n\nleider en... anivdcor rbmfhiox GRP_9
6018 outlook inbox updating \n\nreceived from: xfdkwusj.gyklresa@gmail.com... xfdkwusj gyklresa GRP_0
6023 check router wifi (router: sao-pollaurido-merc... hi team, please check the router wifi sao-poll... qasdhyzm yuglsrwx GRP_4
6027 wendt wac 745 quattro's 5 & 6 an pc bzw. serve... bitte die letzten 2 umfangsschleifmaschinen in... ajiqfrkz dolmpkqf GRP_42
6030 it help \r\n\r\nreceived from: scjxobhd.ldypjkmf@gmail... scjxobhd ldypjkmf GRP_28
6038 telefon-display \r\n\r\nreceived from: pnroqajb.psbyfhkg@gmail... pnroqajb psbyfhkg GRP_33
6048 anmeldeaccount "mp_ek" am pc "empw8111328a:--... am oben genannten pc muss der anmeldename "mp_... lzcvymbn lqnaykvo GRP_0
6051 netzwerk für scan nicht verfügbar, bitte prÃ... netzwerk für scan nicht verfügbar, bitte prÃ... vzqomdgt jwoqbuml GRP_24
6052 zeitwirtschaft germany seit 25.08.2016, 6.00 uhr morgens, sind für w... yjofqlrx aqvxfhmn GRP_25
6056 we 108 scannt nicht mehr hallo ,\r\n\r\nunser drucker we 108 scannt nic... whykbjdq gfqlnysm GRP_24
6065 problem beim skannen von unterlagen \n\nreceived from: jctnelqs.lansuiwe@gmail.com... jctnelqs lansuiwe GRP_0
6067 tr: rappel: vous avez un nouveau message ! ***... \r\n\r\nreceived from: hupnceij.hyozjakb@gmail... hupnceij hyozjakb GRP_0
6069 zlgmctws khfjzyto don´t have access to her co... \r\n\r\nreceived from: qbewrpfu.lwibmxzo@gmail... qbewrpfu lwibmxzo GRP_0
6078 beim scannen von aufträgen kommt die meldung ... beim scannen von aufträgen kommt die meldung ... kiqrvwat gwkpxzyt GRP_33
6086 request to reset microsoft online services pas... from: microsoft on behalf of company inc. [mai... tgynoqcs uxfyzrma GRP_0
6097 apac china pbx/telephony_software system issue apac china& dc have ip phone issue, one e1 lin... bqdyhnoj rwyvsfgn GRP_37
6098 电脑密码同步问题 hi gartryhu,\r\n\r\n我现在电脑有几个é—... pqaoenuv dqythubf GRP_31
6100 skype could not work skype could not work ,can not hear anything qpysibeo zpekrnbu GRP_0
6102 wgq dc所有电话无法拨通外线 wgq dc所有电话无法拨通外线。 wktesmbp lorjymef GRP_31
6106 电脑不能开机 早上上班电脑打不开。 mzerdtop xnlytczj GRP_30
6164 problems weekly report \r\n\r\nreceived from: jokgacwd.hdfcwust@gmail... jokgacwd hdfcwust GRP_0
6165 collaboration_platform assistance \r\n\r\nreceived from: wjpncyef.tspnaimc@gmail... wjpncyef tspnaimc GRP_16
6180 weekly report error message \r\n\r\nreceived from: mrczxwje.ocasryzq@gmail... mrczxwje ocasryzq GRP_0
6203 support für umbau \ ewew8323862 \xwirzvda okh... support für umbau \ ewew8323862 \xwirzvda okh... xwirzvda okhyipgr GRP_24
6208 it help for engineering_tool and engineering_... dear sir,\r\n\r\nplease help to download softw... vxhyftae tbkyfdli GRP_0
6213 wrong nxd in plm \r\n\r\nreceived from: vahqkojb.trlapeso@gmail... vahqkojb trlapeso GRP_11
6215 support für umbau \ewew8323735 \lpfzasmv cleo... support für umbau \ewew8323735 \lpfzasmv cleo... lpfzasmv cleoprzq GRP_24
6218 support für stöhrmann \xosdfhbu gtbfkisl support für stöhrmann \xosdfhbu gtbfkisl xosdfhbu gtbfkisl GRP_24
6220 problem with erp logon \r\n\r\nreceived from: zaeduhlt.jdgsamtv@gmail... zaeduhlt jdgsamtv GRP_0
6226 passwort entsperrung erp SID_34 \r\n\r\nreceived from: plfwoagd.chtpiazu@gmail... plfwoagd chtpiazu GRP_0
6231 aufstellung ordnerzugriff \n\nreceived from: wtxvqngf.nxjivlmr@gmail.com... wtxvqngf nxjivlmr GRP_34
6236 material issue \n\nreceived from: wktesmbp.lorjymef@gmail.com... wktesmbp lorjymef GRP_29
6237 tablet - dell 7350 - 电脑开机启动蓝屏 please provide details of the issue.\r\n电脑... rbkvofgu jthclzow GRP_31
6238 can not post dn# 9169142692 due to batch issue \n\nreceived from: wktesmbp.lorjymef@gmail.com... wktesmbp lorjymef GRP_18
6257 could not setup notification list in dvw syste... user name: yinnrty\nuser id: 11333657100\nsyst... cysbkonu mzutvwir GRP_25
6261 ksem / m36 / ewkw8113117 / rechner startet nicht bios einstellen, das rechner bei netz-reset se... zxopwyak zrbfkimx GRP_33
6275 check router wifi - site: south_amerirtca - sp... hi team, please check the router wifi the site... qasdhyzm yuglsrwx GRP_4
6278 pc probleme urgent \n\nreceived from: eluvxqhw.gpbfkqeu@gmail.com... eluvxqhw gpbfkqeu GRP_0
6298 wireless guest access cti ( network –wireless-guest )\n\nguest fir... vbmzgsdk jdmyazti GRP_0
6335 folder access (s:\globalace_holemaking\ha646) please se my comments in red.\r\n\r\nviele grÃ... dxgyefir rczsatqu GRP_34
6336 blocked web pages \r\n\r\nreceived from: iygsxftl.hysrbgad@gmail... iygsxftl hysrbgad GRP_0
6344 probleme mit weekly report und engineering_too... \n\nreceived from: rkyjnbqh.kfshormi@gmail.com... rkyjnbqh kfshormi GRP_0
6347 skype anmeldung ich habe gestern über den passwortmanager mei... mhvbnqri sacvgzxf GRP_0
6355 support für umbau \ we22 \ port 22 \niptbwdq ... support für umbau \ we22 \ port 22 \niptbwdq ... niptbwdq csenjruz GRP_24
6360 support für umzug \qwynjdbk eamnvwyh support für umzug \qwynjdbk eamnvwyh qwynjdbk eamnvwyh GRP_24
6364 mp_fb konto gesperrt keine anmeldung möglich lqjoagzt gqueiatx GRP_0
6365 keine netzwerkverbindung eemw8144234 keine netzwerkverbindung können h... jbifdshu jzercpoq GRP_0
6368 aw: please take this survey related to ticket_... \n\nreceived from: tgpvrbyi.ztdxwpcn@gmail.com... tgpvrbyi ztdxwpcn GRP_0
6370 kein zugriff auf server möglich! meldung: fehler bei der erneuten verbindungshe... tqrylspg ijzghqwy GRP_12
6373 aw: please take this survey related to ticket_... \n\nreceived from: efjzbtcm.mdpviqbf@gmail.com... efjzbtcm mdpviqbf GRP_0
6398 skype login issue \n\nreceived from: crkdjbot.qiztrxne@gmail.com... crkdjbot qiztrxne GRP_0
6405 instalação guardião (banco hsbc) gentileza, instalar o guardião do banco hsbc ... btxfwisc mapbkflt GRP_62
6431 computer lrrw8514654 - us_plant location252 \n\nreceived from: dpuifqeo.eglwsfkn@gmail.com... dpuifqeo eglwsfkn GRP_0
6463 tengigabitethernet1/5 · connection to tech-45... tengigabitethernet1/5 · connection to tech-45... dkmcfreg anwmfvlg GRP_4
6468 reparo pdf creator favor reparar o pdf creator, quando vou imprim... frjpkuwq rtznexbq GRP_62
6469 top urgent request les team: change back kis i... subject: wrong address on kis inwarehouse_tool... qgopxabz xnuieqjr GRP_13
6488 zugriff auf netzlaufwerke \r\n\r\nreceived from: sthqwdpj.lpnigfyq@gmail... sthqwdpj lpnigfyq GRP_0
6489 login error \r\n\r\nreceived from: nbdljruw.axcrspyh@gmail... nbdljruw axcrspyh GRP_0
6492 distributor_tool überblick / schulung hallo\r\ndanke für ihre antwort.\r\nich habe ... elivbznp tynvpcfk GRP_0
6494 material in q for complaints shipped to dfrt ... we are having an issue with materials from com... vwpxjtof vmidzswj GRP_18
6497 probleme mit vpn stöhrmann \niptbwdq csenjruz probleme mit vpn stöhrmann \niptbwdq csenjruz niptbwdq csenjruz GRP_24
6502 your company guest account credentials wie kann ich die gäste freischalten ?\r\n\r\n... gqhyzpxm ztpomxbe GRP_0
6517 need your support \n\nreceived from: ehfvwltg.eakjbtoi@gmail.com... ehfvwltg eakjbtoi GRP_29
6531 would you please help change the email of ship... from: north service \nsent: wednesday, august ... hnpbcfsz qmvbycax GRP_18
6534 涂层、管丝车间电话故障 涂层、管丝车间电话故障,40634943ã€... vrmpysoz qkiucpdx GRP_48
6535 网络不通 网络不通,右下角网络图标显示未è... neovalui kabpfvic GRP_48
6601 reset the password for gülperi aköz on erp /... on erp SID_34, SID_37 i want to reset my pass... plbwhxqt tqzdygxw GRP_0
6614 collaboration_platform cloud ordner gelöscht \n\nreceived from: uwncfovt.vxjbunfi@gmail.com... uwncfovt vxjbunfi GRP_0
6624 erp hcm: zpd_upload_time \n\nreceived from: jywvemun.qngschtz@gmail.com... jywvemun qngschtz GRP_20
6627 fehler wie gehabt - inplant_852536 datenbanken lassen sich noch nicht öffnen\r\n... djilqgmw bidchqsg GRP_12
6631 def. montitor wechseln bitte den def. monitor von der lüftungssteuer... ndkrcxjb hpormqtx GRP_42
6643 lüfter defekt \rechner für videoüberwachung lüfter defekt \rechner für videoüberwachung niptbwdq csenjruz GRP_24
6647 ich benötige netzwerkkabel für den teleservi... hallo herr bghakch,\r\nich benötige netzwerkk... qidgvtwa qvbutayx GRP_24
6648 probleme mit lan ín beschprechungsraum \puxsv... probleme mit lan ín beschprechungsraum \puxsv... puxsvfwr cwkjruni GRP_24
6670 ksem / wzs60 / m1 (m07.15360) pc defekt, ist selbstständig runter gefahren ... zxopwyak zrbfkimx GRP_33
6676 usa has 2 unresolved mii password lockouts-emp... hi all, \n \nwe had 3-4 occurences where peopl... epivntxc fdrxmuga GRP_0
6710 download drawing response is error even though... when i send the input to download a drawing, i... tskwevno sjhpoakl GRP_20
6718 probleme mit vpn client \r\n\r\nreceived from: fmhlugqk.dpraethi@gmail... fmhlugqk dpraethi GRP_0
6770 bitte einen arbeitszeitplan erstellen für die... bitte einen arbeitszeitplan erstellen für die... ltxzfcgm sxvigclz GRP_52
6776 atualização programdntya ted preciso entregar as declarações e o programd... frjpkuwq rtznexbq GRP_62
6781 set back document t: +49\nplease set back this document to statu... pjrhqkne ewruqyds GRP_12
6785 help with excel that updates from crm \n\nreceived from: eqwaiphc.qxwfeuth@gmail.com... eqwaiphc qxwfeuth GRP_0
6789 edi bestellungen der ksb ag sehr geehrte damen und herren,\r\n\r\nkönnen ... pxsghrjd wiehqmka GRP_32
6802 system disk of server HostName_698 is full \r\n\r\nreceived from: fbyusmxz.kxvmcbly@gmail... fbyusmxz kxvmcbly GRP_12
6803 persönliches laufwerk "adelhmk" nicht mehr ve... bitte scghhnellstmöglich persönliches laufwe... wtxvqngf nxjivlmr GRP_12
6807 drucker findet scan-adresse nicht \r\n\r\nreceived from: fdqjsygx.aivdjqtr@gmail... fdqjsygx aivdjqtr GRP_12
6812 öffnen von exel_anhängen ( 0043 664 4688892) \r\n\r\nreceived from: soldfnbq.uhnbsvqd@gmail... soldfnbq uhnbsvqd GRP_0
6821 setup new laptop für roboworker \qidgvtwa qvb... setup new laptop für roboworker \qidgvtwa qvb... qidgvtwa qvbutayx GRP_24
6825 mb5b download \r\n\r\nreceived from: azovgeck.zuwnxdbt@gmail... azovgeck zuwnxdbt GRP_29
6828 bildschirm im rückmeldeterminal 6 (gegenüber... bildschirm im rückmeldeterminal 6 (gegenüber... frzjtmyk wzacvhki GRP_33
6829 customer master \r\n\r\nreceived from: rgtarthi.erjgypa@compan... xqoljzbh aydcwkxt GRP_0
6832 tablet - dell 7350 - windows系统启动后黑... please provide details of the issue.\r\nwindow... hymjicru ckrxqfes GRP_31
6836 wegen file-server austausch kein zugriff auf d... wegen file-server austausch kein zugriff auf d... tiefszyh sfujdlgv GRP_12
6854 tablet - dell 7350 - windows 8.1 系统被还å... please provide details of the issue.\r\n windo... ybjgecfx nxzuseac GRP_31
6871 no connection to t drive in na \n\nreceived from: blktuiae.jzakfmhw@gmail.com... blktuiae jzakfmhw GRP_0
6896 interface: fastethernet0/27 · vlan 51: lhqwx4... interface: fastethernet0/27 · vlan 51: lhqwx4... jloygrwh acvztedi GRP_4
6906 china(apac): interface: gigabitethernet0/33 ·... china(apac): interface: gigabitethernet0/33 ·... vbwszcqn nlbqsuyv GRP_4
6912 spam \n\nreceived from: qfcxbpht.oiykfzlr@gmail.com... qfcxbpht oiykfzlr GRP_27
6916 re: ticket_no1530897 -- comments added \n\nreceived from: afkstcev.utbnkyop@gmail.com... afkstcev utbnkyop GRP_0
6925 company email accounts tzrekwqf homwadbs! dear support team,\r\n\r\ncan you give me an s... tzrekwqf homwadbs GRP_0
6977 emails not routing from outlook into telephony... i am entering this ticket to confirm that emai... vpnxjtyz wmakhpci GRP_7
6978 support für osterwalder \niptbwdq csenjruz support für osterwalder \niptbwdq csenjruz niptbwdq csenjruz GRP_24
7003 tel. 416 prüfen tel. 416 prüfen swfdvezh fpjybetd GRP_33
7023 转发: 申请退换货,质量投诉, exte... \n\nreceived from: pkdavqwt.tafrmxsh@gmail.com... pkdavqwt tafrmxsh GRP_15
7032 solicito imagens da frente do cd visando avali... período: das 12h00 de 15/08 até 12h00 de 18/... dughqrnf mebofqhz GRP_62
7038 request to reset microsoft online services pas... from: microsoft on behalf of company inc. [mai... recsynqt byoezmla GRP_0
7071 can´t do pgi for mm 5316075 +49 \nSID_34 \nerror: item category is not de... qpiojxcl dxkcljew GRP_18
7088 very urgent: reset windows password \n\nreceived from: oslzvpgk.nhwsxgpb@gmail.com... oslzvpgk nhwsxgpb GRP_0
7095 mitarbeiter reichenberg philipp benötigt bere... mitarbeiter reichenberg philipp benötigt bere... egklxsoy hivwzjcf GRP_34
7098 drucker an messmaschine r173 in halle b hat st... drucker an messmaschine r173 in halle b hat st... frzjtmyk wzacvhki GRP_33
7102 win installation image for russia office \n\nreceived from: koahsriq.wdugqatr@gmail.com... koahsriq wdugqatr GRP_65
7110 browserproblem mit hub \n\nreceived from: mnakehrf.mvunqihf@gmail.com... mnakehrf mvunqihf GRP_0
7112 telephony_software-software upgrade funktionie... telephony_software-software upgrade funktionie... bqyfwclo osjklifb GRP_33
7118 wireless access point prüfen geb.14 ,geb16 un... wireless access point prüfen geb.14 ,geb16 un... xmlbfjpg yegzbvru GRP_24
7124 goods movement mb11 hello,\n\n\nplease let me know how to conduct ... iwazgesl ydgqtpbo GRP_29
7126 电脑卡且连不上内网 \n\nreceived from: tuqrvowp.fxmzkvqo@gmail.com... tuqrvowp fxmzkvqo GRP_30
7130 interface down ::usa switch :: bottom-6509-msf... interface down ::bottom-6509-msfc720 switch ... uxgrdjfc kqxdjeov GRP_4
7135 telephony_software 软件升级后, 打印文ä... telephony_software 软件升级后, 打印文ä... tdsmnuib entgzrhy GRP_31
7159 it assistance. need access usaed to collaborat... \r\n\r\nreceived from: mgcivbtx.bshmfxya@gmail... mgcivbtx bshmfxya GRP_16
7174 laptop damaged as it fell in flight from: prarthyr jha \nsent: wednesday, august 1... jtqaplhs yjmpiqcu GRP_19
7204 ı cant connect netviewer ı connected at vpn after ı saw main screen o... ouqwcehj amtqxvpg GRP_0
7232 erp logon does not open \r\n\r\nreceived from: qjtbrvfy.avwqmhsp@gmail... qjtbrvfy avwqmhsp GRP_0
7236 server probleme \r\n\r\nreceived from: jxlekivs.fwakmztv@gmail... jxlekivs fwakmztv GRP_0
7240 connecting with erp not possible \r\n\r\nreceived from: yfmaqovp.wdonhbez@gmail... yfmaqovp wdonhbez GRP_0
7243 erp is broken down \r\n\r\nreceived from: cbupnjzo.daflthkw@gmail... cbupnjzo daflthkw GRP_0
7248 probleme mit scanner und drucker \zslugaxq dtw... \r\nbei frau gödde ist der drucker nicht i.o.... zslugaxq dtwlrofu GRP_24
7254 probleme mit anmelden. \wxstfouy isjzcotm leider kann ich mich nicht an meinem rechner a... wxstfouy isjzcotm GRP_24
7257 engineering_drawing_tool rahmen kann nicht gel... engineering_drawing_tool lädt keine zeichnung... dknzygjt bscvykei GRP_46
7262 reset the password for szcbhvwe edpouqjl on er... dear it team, \r\n\r\ncan you please be so kin... szcbhvwe edpouqjl GRP_0
7264 meeting inivation no skype 加载项 meeting inivation no skype 加载项 。 ynmcplev qpgctajz GRP_31
7278 keine azm-meldungen möglich im EU_tool lassen sich keine azm-meldungen ein... arkmtcig adpsrxjc GRP_25
7294 problem mit festnetztelefon \n\nreceived from: jctnelqs.lansuiwe@gmail.com... jctnelqs lansuiwe GRP_33
7302 skype会议时不去 skype会议从邮箱里的链接进不去。 rekpvblc ufysatml GRP_30
7307 实习生登录office365下载邮件附件时æ... 实习生登录office365下载邮件附件时æ... kyagjxdh dmtjpbnz GRP_30
7309 餐厅电视无法播放视频文件 餐厅电视机无法播放视频文件,请å... aeozwlch lkiocfbn GRP_30
7314 制粉电脑电源线等损坏 制粉三楼控制室内电脑电源线、网ç... agyvbnwz mxsonkdc GRP_48
7315 skype for business故障 系统中突然找不到skype for business了 uheflzgy cpaosbfz GRP_48
7316 拼写和语法错误 编辑ppt时,总是弹出"无法检查拼写... jtplaoui uvsihfpn GRP_48
7317 电脑故障 质控部拉力试验机控制电脑的操作ç... kwpzbxvf cvuhoizx GRP_48
7375 wvdxnkhf jirecvta has issues to connect compan... wvdxnkhf jirecvta has issues to connect compan... ugephfta hrbqkvij GRP_0
7376 erp access issue system (SID_34, SID_37, SID_39, SID_38, hrp, o... kmtpzyre mqlsfkre GRP_2
7382 connecting drives to my computer \r\n\r\nreceived from: zxobmreq.udikorhv@gmail... zxobmreq udikorhv GRP_0
7404 probleme mit öffnen von dokumenten im intrane... probleme mit öffnen von dokumenten im intrane... xmlbfjpg yegzbvru GRP_24
7415 bitte um ein ruckruf \n\nreceived from: zkgfcyvx.sgxeatyb@gmail.com... zkgfcyvx sgxeatyb GRP_0
7418 berechtigung zeitwirtschaft av hallo herr busse,\r\n\r\nwürden sie dies bitt... pmweoxyq zrkjnydi GRP_0
7422 telephony_software_2016r2_installation - upgra... \r\n\r\nreceived from: zjcsqtdn.jikyworg@gmail... zjcsqtdn jikyworg GRP_65
7428 new order--mm# 2694571 dmhpm045 / mm# 2695539 ... from: crysyhtal xithya \r\nsent: tuesday, augu... smktofel etsoirbw GRP_44
7431 email-anzeige \r\n\r\nreceived from: trgqbeax.hfyzudql@gmail... trgqbeax hfyzudql GRP_0
7433 zpdist_programdnty not allowing to distribute ... \r\nhello chandruhdty, ebi,\r\n\r\ni´ve creat... cfajzero vlygoksi GRP_18
7436 nicht gebuchte anzahlungsrechnungen company 54... siehe beigefügte e-mail hgufmidr mfobkyun GRP_10
7439 答复: help for mm#4866474 24800776 \r\n\r\nreceived from: windy.shi@company.com\r... tycludks cjofwigv GRP_6
7441 loaner laptops germany location germany requ... \r\n\r\nreceived from: ubiqcrvy.mxjcnqfs@gmail... ubiqcrvy mxjcnqfs GRP_28
7444 support für umzug \qvncizuf ueiybanz support für umzug \qvncizuf ueiybanz qvncizuf ueiybanz GRP_24
7449 request to reset microsoft online services pas... from: microsoft on behalf of company inc. [mai... xqkydoat bveiyclr GRP_0
7450 problem mit start in outlook \r\n\r\nreceived from: ptuchwad.yzvrlcqa@gmail... ptuchwad yzvrlcqa GRP_0
7470 sipppr for help dear it,\n\n以下是sipppr的入口链接ã€... pfiyvdea uwbdsfmr GRP_31
7474 r173 schneeberger cnc nr.20123 / wza programdntyme lassen sich teilweise nicht öff... zxopwyak zrbfkimx GRP_33
7499 password reset from: jghjimdghty bfhjtuiwell \nsent: monday, ... mdbegvct dbvichlg GRP_0
7512 it-germany-h. kruse / as-400-passwortänderung... hallo frthdyui,\r\npasswortänderung in der as... jdynzuim uapkdvgr GRP_49
7536 immediate restoration of t drive files required \n\nreceived from: crkdjbot.qiztrxne@gmail.com... crkdjbot qiztrxne GRP_0
7543 outlook (nicht lizensiertes produkt) \r\n\r\nreceived from: vnsmwqhb.ogtpenjd@gmail... vnsmwqhb ogtpenjd GRP_0
7545 wvdxnkhf jirecvta locked out himself from wind... user id : owenghyga\nhe was locked out while u... wvdxnkhf jirecvta GRP_0
7549 crm - unsafe web-side after pw-update i've tried to lo log-on to crm... atlwdyej vtlhzbix GRP_0
7565 mail address of cytohwau qfunricw at ticketing... \r\n\r\nreceived from: qdxyifhj.zbwtunpy@gmail... qdxyifhj zbwtunpy GRP_34
7572 答复: email address link to delivery not 转... \r\n\r\nreceived from: jkmeusfq.vjpckzsa@gmail... jkmeusfq vjpckzsa GRP_18
7574 pc ewkw8111185 funktioniert nicht (pc lässt s... von: cytohwau qfunricw \ngesendet: donnerstag,... cytohwau qfunricw GRP_33
7577 tablet - dell 7350 -无法连接无线wifi网ç... please provide details of the issue.\r\n无法... dmexgspl mruzqhac GRP_31
7581 vpn 不能登录,网页一直在检查防病... vpn 不能登录,网页一直在检查防病... eokwmfay ewbltgha GRP_31
7582 文件无法通过打印机fe08打印 文件无法通过打印机fe08打印 omatlyrd bvraipdt GRP_30
7586 windows 系统无法登录提示计算机与ä¸... windows 系统无法登录提示计算机与ä¸... gdyicrel kvtlodsx GRP_31
7587 erp purchasing error when i tried to submit the "describe what you ... kyagjxdh dmtjpbnz GRP_29
7588 显示器不亮 早上开机后显示器不出图像。 wgmqlnzh vpebwoat GRP_30
7598 office 365 sprache ändern/ videos aus "thehub... hallo, ist es möglich das ich office365 von e... xbyudksw zbfxlmus GRP_0
7631 reset microsoft online services password for ... from: microsoft on behalf of company inc. [mai... rdwpangu lybaxonw GRP_0
7644 phone display from: judi elituytt \r\nsent: saturday, august... nmzfdlar whzbrusx GRP_37
7677 company mails on mobile phone \r\n\r\nreceived from: ctuodmai.vguwqjtd@gmail... ctuodmai vguwqjtd GRP_0
7705 news can not be opened \r\n\r\nreceived from: uwofavej.hxyatnjc@gmail... uwofavej hxyatnjc GRP_0
7710 12th aug ,friday on-call - basis \n\nreceived from: mnxbeuso.rfmdlwuo@gmail.com... mnxbeuso rfmdlwuo GRP_0
7715 ak - crm in outlook not working \n\nreceived from: wdpzfqgi.zndgqcux@gmail.com... wdpzfqgi zndgqcux GRP_0
7717 setup laptop für sandstrahlgerät und robowor... setuplaptop für sandstrahlgerät und robowork... qidgvtwa qvbutayx GRP_24
7718 mobiltelefon lautsprecher / mikrofon defekt / ... durchwahl -477\r\nmobilteil gigaset sl2\r\nlau... tblmnxez ulcmryaf GRP_33
7719 can you please help \r\n\r\nreceived from: smktofel.etsoirbw@gmail... smktofel etsoirbw GRP_55
7725 problem in calling through skype \n\nreceived from: ftgvlneh.aitsgqwo@gmail.com... ftgvlneh aitsgqwo GRP_0
7732 recall: ticket_no1536413 -- comments added \r\n\r\nreceived from: franhtyu.liu@company.co... vblmcyax vlaorjum GRP_0
7753 erp produktion hängt bei ot03 aufträgen ot01 aufträge funktionieren, ot03 aufträge l... jftsvazy ypqmliwa GRP_0
7759 need ticket zmm_stock_transfer \n\nreceived from: dqplrwoy.cutpwjie@gmail.com... dqplrwoy cutpwjie GRP_29
7792 pc aufstellen bitte an smpijawb eawkpgqf weiterleiten:\r\nna... mvwiygou rpkscnlv GRP_42
7793 arbeitsplatz umziehen bitte an smpijawb eawkpgqf weiterleiten:\r\nal... mvwiygou rpkscnlv GRP_42
7805 skype funktionert nicht. from: zlnxswvp ptmzsbhk \r\nsent: thursday, au... zlnxswvp ptmzsbhk GRP_0
7807 datenübertragung \r\n\r\nreceived from: weszfyok.fbadnjhu@gmail... weszfyok fbadnjhu GRP_25
7809 international payments rejected for payment me... \r\nvon: pradyhtueep yyufs \r\ngesendet: 10 au... ubiqcrvy mxjcnqfs GRP_10
7825 please reset erp SID_34 password for user peil... \r\n\r\nreceived from: krdvgzeh.yboasemp@gmail... krdvgzeh yboasemp GRP_0
7829 erp/SID_34 - production orders cannot be print... error:\r\nverbindung zu system production_orde... slbfmqpa qdroplyz GRP_45
7830 restore some folders for stoebtrt \r\n\r\nreceived from: sxhcapoe.kbluefip@gmail... sxhcapoe kbluefip GRP_0
7833 error drucker \r\n\r\nreceived from: gsnuhpji.qpyfctwl@gmail... gsnuhpji qpyfctwl GRP_33
7845 read and write access to oe drive fürth \r\n\r\nreceived from: scjxobhd.ldypjkmf@gmail... scjxobhd ldypjkmf GRP_0
7847 issue on pricing in distributor_tool we have agreed price with many of the distribu... hbmwlprq ilfvyodx GRP_21
7873 error in accessing reporting_engineering_tool error in accessing reporting_engineering_tool\... urhpnlaf agmsfqil GRP_9
7900 zip code entered into erp account in SID_34 do... hello\r\naccount 81548372, zip code was update... ugephfta hrbqkvij GRP_40
7915 .netframdntyework 4.6.1 & business_client 5.0 ... \r\n\r\nreceived from: yqlvfkih.folbpugd@gmail... yqlvfkih folbpugd GRP_0
7928 install 64 bit version von ms office für pc 2... install 64 bit version von ms office für pc 2... njdrcagt shourxyp GRP_24
7931 outlock suche \r\n\r\nreceived from: giumxwvh.lfvwjtin@gmail... giumxwvh lfvwjtin GRP_28
7941 文件无法打印到打印机,提示打印æ... prtSID_737--文件无法打印到打印机,æ... rtjwbuev gfpwdetq GRP_31
7943 new prospect accounts created in crm do not ge... hello, \r\n\r\ni have two specific cases where... ugephfta hrbqkvij GRP_40
7948 engineering tool das passwort wurde meinerseits falsch eingegeb... gasbfqvp fmvqgjih GRP_0
7969 客户提供的在线系统打不开 客户提供的在线送货单生成系统打ä... fupikdoa gjkytoeh GRP_48
7996 security incidents - ( in33490582 ) : suspicio... source ip : 29.26.13.3095\r\nsystem name :Host... gzhapcld fdigznbk GRP_12
7997 security incidents - ( sw #in33544563 ) : poss... source ip : 45.25.35.0499\nsystem name : lpal9... ugyothfz ugrmkdhx GRP_2
8002 security incidents - ( sw #in33544563 ) : poss... source ip : 45.25.35.0499\r\nsystem name : lpa... ugyothfz ugrmkdhx GRP_62
8026 delivery failures date of email: 07/20/2016 (i have attached bot... kbnfxpsy gehxzayq GRP_26
8033 restore \r\n\r\nreceived from: trgqbeax.hfyzudql@gmail... trgqbeax hfyzudql GRP_12
8034 o outloock não esta funcionando. mensagem de erro:" infelizmente, o outlook enc... jidhewlg jufskody GRP_62
8049 revisar pc lpaw8515845 (g0rfv12) usadtto dfsdp... usar microsoft update \r\natualizar adobe acro... kbnfxpsy gehxzayq GRP_62
8068 business_client login hi,\r\n\r\nkindly help me out on access drawin... qxmujhwg rsgqidxw GRP_0
8097 aw: [ticket#2016080878000153] wg: po#4505612440 \r\n\r\nreceived from: qyndvmlw.imcvznow@gmail... qyndvmlw imcvznow GRP_32
8101 unable to open sales orders attachments - save... issue 1 - after erp update on 8.8.2016 (a syst... hpmwliog kqtnfvrl GRP_0
8103 wg: [ticket#2016080878000153] wg: po#4505612440 \r\n\r\nreceived from: tvmlrwkz.rsxftjep@gmail... tvmlrwkz rsxftjep GRP_32
8105 bios einstellung bitte an wzs 60 (wza, m14, m12.18990) bios so ... zxopwyak zrbfkimx GRP_33
8106 networking issue at customer site \r\n\r\nreceived from: vkzwibco.pueyvhoi@gmail... vkzwibco pueyvhoi GRP_19
8169 goods receipt issue hello, on po5616641196 the po was created for ... fonjtyhr wnclfztv GRP_29
8190 ingreso a business_client no puedo ingresar a business_client con mi con... igfbkzne duyinzmx GRP_0
8214 problem with EU_tool hello altogether\r\n\r\nwe have problems with ... mstnjfai xcobykhl GRP_0
8216 problem with EU_tool \r\n\r\nreceived from: mstnjfai.xcobykhl@gmail... mstnjfai xcobykhl GRP_25
8230 investment antrag mnakehrf mvunqihf - new lapt... \r\n\r\nreceived from: jywvemun.qngschtz@gmail... jywvemun qngschtz GRP_28
8232 stepfhryhan needs access to below collaboratio... stepfhryhan needs access to below collaboratio... nizholae bjnqikym GRP_0
8238 probleme mit drucker in löwe uacyltoe hxgaycz... probleme mit drucker in löwe uacyltoe hxgaycz... bejvhsfx dmvsclhp GRP_24
8242 telefon hörer am apparat fertigung halle c de... einlasten bei it-plant germany dtrvxiuq bwuqdtfo GRP_33
8244 now acces to the internet - webside allways ... \r\n\r\nreceived from: blktuiae.jzakfmhw@gmail... blktuiae jzakfmhw GRP_0
8266 erp无法进行采购(转给贺正平) 进行采购时显示"找不到员工111115483... kyagjxdh dmtjpbnz GRP_30
8270 密码更改后,outlook无法登陆 密码更改后,outlook无法登陆 yamndlrz nzcvdmtr GRP_30
8322 designation change required in outlook. \r\n-------- original message --------\r\nfrom... bvlcarfe aztlkeif GRP_2
8370 update of ae to bw and hana wiksufty, jimdghty l manager – so i believe ... sholvcmf bjtpomrl GRP_9
8371 request to reset microsoft online services pas... from: microsoft on behalf of company inc. [mai... kzeqbica kzcjeiyd GRP_0
8412 supply_chain_software login \n\nreceived from: ryafbthn.mijhmiles@company.... gbfdksqi whjtkilf GRP_0
8414 langsamer rechner \überprüfung \niptbwdq cse... langsamer rechner \überprüfung \niptbwdq cse... niptbwdq csenjruz GRP_24
8420 mobiltelefon defekt mobiltelefon gigaset m2 professional mit der d... ptvdxwla tlevwmzo GRP_33
8422 driver update \r\n\r\nreceived from: kbdljsxf.kcmqtjgf@gmail... kbdljsxf kcmqtjgf GRP_19
8425 ich kann mein erp passwort nicht zurück setzten. ich weiß mein erp passwort nicht mehr und hab... wfbkucds qaxhbois GRP_0
8427 new employee not able to login to system vvrtg... user id : vvrtgwildj\nname : johghajknnes wil... wczrtsja crwioekx GRP_0
8436 bls (beschichtungsleitstand) germany workflow ... error message: "laufzeitfehler '94': ungültig... xsjqhdgp ymstzudl GRP_25
8439 der drucker für die ups-lapels druckt nicht r... der drucker steht am platz von wckrxovs aunsgzmd kiqrvwat gwkpxzyt GRP_33
8457 无法登陆hr_tool考勤系统 显示java插件无法加载,所需版本1.8... kyagjxdh dmtjpbnz GRP_30
8465 vpn 连接不上 vpn连不上,请转给 贺正平 atcbvglq.bd... fcvqezmn frzyjknu GRP_30
8467 hi it help team, please unblock my new compan... from: ntydihzo aeptfbgs \r\nsent: friday, augu... ntydihzo aeptfbgs GRP_0
8470 please review your recent ticketing_tool ticke... from: mikhghytr wafglhdrhjop \nsent: thursday,... azxhejvq fyemlavd GRP_16
8471 电脑开机开不出来 to 小贺,早上电脑开机开不出来 xqyjztnm onfusvlz GRP_30
8480 customer group enhanced field \r\n\r\nreceived from: nlearzwi.ukdzstwi@gmail... nlearzwi ukdzstwi GRP_9
8498 machine não está funcionando i am unable to access the machine utilities to... ufawcgob aowhxjky GRP_62
8499 an mehreren pc`s lassen sich verschiedene prgr... an mehreren pc`s lassen sich verschiedene prgr... kqvbrspl jyzoklfx GRP_49

6.1.2 Applying the function on Short Description and Description Class of the dataset

In [ ]:
dataset1['Short description']=dataset1['Short description'].apply(fix_text)
dataset1['Description']=dataset1['Description'].apply(fix_text)

6.1.3 Sample record to check if the function of Mojibake got applyed correctly

In [ ]:
dataset1.loc[7581]
Out[ ]:
Short description    vpn 不能登录,网页一直在检查防病毒软件。
Description          vpn 不能登录,网页一直在检查防病毒软件。
Caller                    eokwmfay ewbltgha
Assignment group                     GRP_31
Name: 7581, dtype: object

6.2 Language Detection

In [ ]:
def fn_lan_detect(df):                                        
   try:                                                          
      return detect(df)                                      
   except:                                                       
      return 'no'

6.2.2 Adding a new Column 'Language' and applying the function on Description Class. 'Language' column will be updated with language of that row.

In [ ]:
dataset1['Language'] = dataset1['Description'].apply(fn_lan_detect)

6.2.3 After detecting the languages, it was found that English was the most widely used language followed by German.

Below graph displays the distribution of Languages

In [ ]:
x = dataset1["Language"].value_counts()
x=x.sort_values(ascending=False)
plt.figure(figsize=(10,6))
ax= sns.barplot(x.index, x.values, alpha=0.8)
plt.title("Distribution of text by language")
plt.ylabel('number of records')
plt.xlabel('Language')
rects = ax.patches
labels = x.values
for rect, label in zip(rects, labels):
    height = rect.get_height()
    ax.text(rect.get_x() + rect.get_width()/2, height + 5, label, ha='center', va='bottom')
plt.show();

6.2.4 Since English has the max count, below graph displays the distribution of all Non English Languages

In [ ]:
dataset1_nonEnglish = pd.DataFrame(dataset1[dataset1["Language"]!='en'])

x = dataset1_nonEnglish["Language"].value_counts()
x=x.sort_values(ascending=False)
plt.figure(figsize=(10,6))
ax= sns.barplot(x.index, x.values, alpha=0.8)
plt.title("Distribution of text by Non English language")
plt.ylabel('number of records')
plt.xlabel('Language')
rects = ax.patches
labels = x.values
for rect, label in zip(rects, labels):
    height = rect.get_height()
    ax.text(rect.get_x() + rect.get_width()/2, height + 5, label, ha='center', va='bottom')
plt.show();
In [ ]:
dataset1['Language'].value_counts()
Out[ ]:
en       6956
de        432
af        256
it        133
fr        103
no         73
sv         61
nl         57
zh-cn      56
es         55
ca         37
pl         29
da         29
pt         22
ro         17
ko         15
et         13
sl         12
tl         11
cy          9
sq          8
hr          7
fi          5
id          5
so          4
lt          3
hu          2
vi          2
cs          1
tr          1
sw          1
lv          1
sk          1
Name: Language, dtype: int64

Also, converting the whole data of column into lower case

6.3.1 Below function will clean the data of given column and return the dataset

In [ ]:
email_regex = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b'
In [ ]:
def fn_remove_irrelaventWords(df,columnName):
  for index in range(df.shape[0]):
    df[columnName][index] = df[columnName][index].lower()                                       # to lower case 
    df[columnName][index] = re.sub(email_regex,"",df.loc[index,columnName])                     # remove email address
    df[columnName][index] = re.sub(r'\S*@\S*\s?', '', df.loc[index,columnName])                 # remove email address with appended text
    df[columnName][index] = re.sub(r"received from:",' ',df.loc[index,columnName])              # remove unwanted text
    df[columnName][index] = re.sub(r"from:",' ',df.loc[index,columnName])                       # remove unwanted text
    df[columnName][index] = re.sub(r"to:",' ',df.loc[index,columnName])                         # remove unwanted text
    df[columnName][index] = re.sub(r"subject:",' ',df.loc[index,columnName])                    # remove unwanted text  
    df[columnName][index] = re.sub(r"sent:",' ',df.loc[index,columnName])                       # remove unwanted text
    df[columnName][index] = re.sub(r"ic:",' ',df.loc[index,columnName])                         # remove unwanted text
    df[columnName][index] = re.sub(r"cc:",' ',df.loc[index,columnName])                         # remove unwanted text
    df[columnName][index] = re.sub(r"bcc:",' ',df.loc[index,columnName])                        # remove unwanted text  
    df[columnName][index] = re.sub(r'\d+','' ,df.loc[index,columnName])                         # remove numbers
    df[columnName][index] = re.sub(r'\n',' ',df.loc[index,columnName])                          # remove new line character
    df[columnName][index] = re.sub(r'#','', df.loc[index,columnName])                           # remove hashtag while keeping hashtag text
    df[columnName][index] = re.sub(r'&;?', 'and',df.loc[index,columnName])                      # remove &
    df[columnName][index] = re.sub(r'\&\w*;', '', df.loc[index,columnName])                     # remove HTML special entities (e.g. &amp;)
    df[columnName][index] = re.sub(r'https?:\/\/.*\/\w*', '', df.loc[index,columnName])         # remove hyperlinks
    df[columnName][index] = re.sub(r"hello",' ',df.loc[index,columnName])                       # remove 'hello' word
    df[columnName][index] = re.sub(r"_",' ',df.loc[index,columnName])                           # remove _
    df[columnName][index] = re.sub(r"-",' ',df.loc[index,columnName])                           # remove -
    df[columnName][index] = re.sub(r",",' ',df.loc[index,columnName])
    df[columnName][index] = re.sub(r"\(",' ',df.loc[index,columnName])
    df[columnName][index] = re.sub(r"\)",' ',df.loc[index,columnName])
  return df
In [ ]:
df_clean = dataset1.reset_index()
In [ ]:
df_clean.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8417 entries, 0 to 8416
Data columns (total 6 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   index              8417 non-null   int64 
 1   Short description  8417 non-null   object
 2   Description        8417 non-null   object
 3   Caller             8417 non-null   object
 4   Assignment group   8417 non-null   object
 5   Language           8417 non-null   object
dtypes: int64(1), object(5)
memory usage: 394.7+ KB

6.3.2 Applying the function on Description and Short Description Class

In [ ]:
df_clean = fn_remove_irrelaventWords(df_clean,'Description')
df_clean = fn_remove_irrelaventWords(df_clean,'Short description')
In [ ]:
df_clean.tail(20)
Out[ ]:
index Short description Description Caller Assignment group Language
8397 8480 customer group enhanced field a business decision has recently bee... nlearzwi ukdzstwi GRP_9 en
8398 8481 ess portal hi team i was going into the ess file ... eagvusbr nguqityl GRP_9 en
8399 8482 robot hostname is inactive robot hostname is inactive rkupnshb gsmzfojw GRP_8 en
8400 8483 fw: case id [ref: case :ref] :: others pacvbetl yptglhoe thursday august : p... pacvbetl yptglhoe GRP_0 en
8401 8484 please remove user hugcadrn ixhlwdgt ralfteim... please remove user hugcadrn ixhlwdgt ralfteim... hugcadrn ixhlwdgt GRP_2 en
8402 8485 ticket update on inc to user hbmwlprq ilfvyodx ticket update on inc to user hbmwlprq ilfvyodx fumkcsji sarmtlhy GRP_0 en
8403 8486 ticket update on ticket no ticket update on ticket no fumkcsji sarmtlhy GRP_0 sv
8404 8487 telephony software is missing from pc pc received multiple windows security updates ... pvbomqht smfkuhwi GRP_3 en
8405 8488 erp account unlock name:mfeyouli ndobtzpw language: browser:micro... rbozivdq gmlhrtvp GRP_0 en
8406 8489 account locked account locked sdvlxbfe ptnahjkw GRP_0 en
8407 8490 check status in purchasing please contact ed pasgryowski pasgryo about ... mpihysnw wrctgoan GRP_29 en
8408 8491 vpn for laptop i need a vpn for my new laptop. name llv... jxgobwrm qkugdipo GRP_34 en
8409 8492 hr tool etime option not visitble hr tool etime option not visitble tmopbken ibzougsd GRP_0 en
8410 8493 erp fi ob two accounts to be added i am sorry i have another two accounts that n... ipwjorsc uboapexr GRP_10 en
8411 8494 tablet needs reimaged due to multiple issues w... tablet needs reimaged due to multiple issues w... cpmaidhj elbaqmtp GRP_3 en
8412 8495 emails not coming in from zz mail good afternoon i am not receiving the e... avglmrts vhqmtiua GRP_29 en
8413 8496 telephony software issue telephony software issue rbozivdq gmlhrtvp GRP_0 en
8414 8497 vip: windows password reset for tifpdchb pedxruyf vip: windows password reset for tifpdchb pedxruyf oybwdsgx oxyhwrfz GRP_0 en
8415 8498 machine não está funcionando i am unable to access the machine utilities to... ufawcgob aowhxjky GRP_62 en
8416 8499 an mehreren pc`s lassen sich verschiedene prgr... an mehreren pc`s lassen sich verschiedene prgr... kqvbrspl jyzoklfx GRP_49 de
In [ ]:
df_clean = df_clean.drop(columns=['index'],axis=1)
df_clean.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8417 entries, 0 to 8416
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   Short description  8417 non-null   object
 1   Description        8417 non-null   object
 2   Caller             8417 non-null   object
 3   Assignment group   8417 non-null   object
 4   Language           8417 non-null   object
dtypes: object(5)
memory usage: 328.9+ KB

6.3.3 In the Short Description and Description, it was observed the Caller Name were also present which is not relevant as far as passing the data into Model is concerned.

Below function will remove the Caller Name from a given column of that row

In [ ]:
def fn_removeCaller(df,columnName):
  for index in range(df.shape[0]):
    if (df['Caller'][index] in df[columnName][index]):
      df[columnName][index] = df[columnName][index].replace(df['Caller'][index],"person")
    if (df['Caller'][index].replace(" ","") in df[columnName][index]):
      df[columnName][index] = df[columnName][index].replace(df['Caller'][index].replace(" ",""),"person")
  return df
In [ ]:
df_clean = fn_removeCaller(df_clean,'Description')
df_clean = fn_removeCaller(df_clean,'Short description')
In [ ]:
df_clean.tail(10)
Out[ ]:
Short description Description Caller Assignment group Language
8407 check status in purchasing please contact ed pasgryowski pasgryo about ... mpihysnw wrctgoan GRP_29 en
8408 vpn for laptop i need a vpn for my new laptop. name llv... jxgobwrm qkugdipo GRP_34 en
8409 hr tool etime option not visitble hr tool etime option not visitble tmopbken ibzougsd GRP_0 en
8410 erp fi ob two accounts to be added i am sorry i have another two accounts that n... ipwjorsc uboapexr GRP_10 en
8411 tablet needs reimaged due to multiple issues w... tablet needs reimaged due to multiple issues w... cpmaidhj elbaqmtp GRP_3 en
8412 emails not coming in from zz mail good afternoon i am not receiving the e... avglmrts vhqmtiua GRP_29 en
8413 telephony software issue telephony software issue rbozivdq gmlhrtvp GRP_0 en
8414 vip: windows password reset for tifpdchb pedxruyf vip: windows password reset for tifpdchb pedxruyf oybwdsgx oxyhwrfz GRP_0 en
8415 machine não está funcionando i am unable to access the machine utilities to... ufawcgob aowhxjky GRP_62 en
8416 an mehreren pc`s lassen sich verschiedene prgr... an mehreren pc`s lassen sich verschiedene prgr... kqvbrspl jyzoklfx GRP_49 de

6.3.4 Exporting the clean dataset and saving it for further processing

In [ ]:
df_clean.to_excel("df_clean_LangDetected.xlsx")

6.4 Language Translation

6.4.1 Using the GoSlate Library, we have Translated all the languages to English and verified it using Google Translator Directly importing the Translated dataset

In [ ]:
# svc_domains = ['.com','.com.au','.com.ar','.co.kr','.co.in','.co.jp','.at','.de','.ru','.ch','.fr','.es','.ae']
# svc_urls = ['http://translate.google' + domain for domain in svc_domains]
In [ ]:
# gs = Goslate(service_urls=svc_urls)
# trans_8416 = gs.translate(df_clean['Description'][8416], target_language='en', source_language='auto')

6.4.2 Sample of Language Translation of record.

In [ ]:
# print ('Original Text : ',df_clean['Description'][8416])
# print('Traslated to English : ',trans_8416)
Original Text :  an mehreren pc`s lassen sich verschiedene prgramdntyme nicht öffnen. bereich cnc.
Traslated to English :  Several prgramdntyme can not be opened on several PCs. Area CNC.
In [ ]:
# def fn_ConvertToEnglish(df,columnName):
#  for idx in range(df.shape[0]):
#    row_iter = gs.translate(df[columnName][idx],target_language='en',source_language = 'auto')
#    df[columnName][idx] = str(row_iter)
#  return df
In [ ]:
# df_lang = fn_ConvertToEnglish(df_clean,'Description')
# df_lang = fn_ConvertToEnglish(df_clean,'Short description')

6.4.3 Exporting the dataset with all the records Translated to English for the Description and Short Description class.

In [ ]:
#df_lang.to_excel("df_clean_LangDetected_Translated.xlsx")
In [ ]:
df_lang = pd.read_excel("/content/sample_data/df_clean_LangDetected_Translated.xlsx")
In [ ]:
df_lang.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8417 entries, 0 to 8416
Data columns (total 6 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   Unnamed: 0         8417 non-null   int64 
 1   Short description  8417 non-null   object
 2   Description        8417 non-null   object
 3   Caller             8417 non-null   object
 4   Assignment group   8417 non-null   object
 5   Language           8417 non-null   object
dtypes: int64(1), object(5)
memory usage: 394.7+ KB
In [ ]:
df_lang = df_lang.drop(columns=['Unnamed: 0'],axis=1)
In [ ]:
df_lang.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8417 entries, 0 to 8416
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   Short description  8417 non-null   object
 1   Description        8417 non-null   object
 2   Caller             8417 non-null   object
 3   Assignment group   8417 non-null   object
 4   Language           8417 non-null   object
dtypes: object(5)
memory usage: 328.9+ KB
In [ ]:
df_lang[pd.isnull(df_lang).any(axis=1)]
Out[ ]:
Short description Description Caller Assignment group Language
In [ ]:
df_lang.fillna(str(), inplace=True)
df_lang.isnull().sum()
Out[ ]:
Short description    0
Description          0
Caller               0
Assignment group     0
Language             0
dtype: int64

7. Merging Short Description and Description column to a new column 'combined_description'

In [ ]:
df_lang.insert(loc=4, 
              column='combined_description', 
              allow_duplicates=True, 
              value=list(df_lang['Short description'].str.strip() + ' ' + df_lang['Description'].str.strip()))
In [ ]:
df_lang.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8417 entries, 0 to 8416
Data columns (total 6 columns):
 #   Column                Non-Null Count  Dtype 
---  ------                --------------  ----- 
 0   Short description     8417 non-null   object
 1   Description           8417 non-null   object
 2   Caller                8417 non-null   object
 3   Assignment group      8417 non-null   object
 4   combined_description  8417 non-null   object
 5   Language              8417 non-null   object
dtypes: object(6)
memory usage: 394.7+ KB
In [ ]:
df_lang.head()
Out[ ]:
Short description Description Caller Assignment group combined_description Language
0 login issue verified user details. employee and manager na... spxjnwir pjlcoqds GRP_0 login issue verified user details. employee an... en
1 outlook team my meetings/skype meetings etc are not ap... hmjdrvpb komuaywn GRP_0 outlook team my meetings/skype meetings etc ar... en
2 cant log in to vpn hi i cannot log on to vpn best eylqgodm ybqkwiam GRP_0 cant log in to vpn hi i cannot log on to vpn best en
3 unable to access hr tool page unable to access hr tool page xbkucsvz gcpydteq GRP_0 unable to access hr tool page unable to access... en
4 skype error skype error owlgqjme qhcozdfx GRP_0 skype error skype error no

7.1 After mering and applying functions of preprocessing of data, it was observed few more irrelavant text hence created below function to remove such words

In [ ]:
def fn_remove_irrelaventWords_LevelTwo(df,columnName):
  for index in range(df.shape[0]):
    df[columnName][index] = df[columnName][index].lower()                                       # to lower case 
    df[columnName][index] = df[columnName][index].replace("// ::",' ')
    df[columnName][index] = df[columnName][index].replace("<",' ')
    df[columnName][index] = df[columnName][index].replace(">",' ')
    df[columnName][index] = df[columnName][index].replace(";",' ')
    df[columnName][index] = df[columnName][index].replace(".",' ')
    df[columnName][index] = df[columnName][index].replace("•",' ')
    df[columnName][index] = df[columnName][index].replace("?",' ')
    df[columnName][index] = df[columnName][index].replace("\\",' ')
    df[columnName][index] = df[columnName][index].replace("\/",' ')
    df[columnName][index] = df[columnName][index].replace(":",' ',)
    df[columnName][index] = df[columnName][index].replace("%",' ',)
    df[columnName][index] = df[columnName][index].replace("=",' ',)
    df[columnName][index] = df[columnName][index].replace("[mail ]",' ')
    df[columnName][index] = df[columnName][index].replace("[",' ')
    df[columnName][index] = df[columnName][index].replace("]",' ')
    df[columnName][index] = df[columnName][index].replace("< mail >",' ')
    df[columnName][index] = df[columnName][index].replace("+",' ')
    df[columnName][index] = df[columnName][index].replace("\"",' ')
    df[columnName][index] = df[columnName][index].replace("' ",' ')
    df[columnName][index] = df[columnName][index].replace(" '",' ')
    df[columnName][index] = df[columnName][index].replace("* * * ",' ')
    df[columnName][index] = df[columnName][index].replace(" * * *",' ')
    df[columnName][index] = df[columnName][index].replace("* ",' ')
    df[columnName][index] = df[columnName][index].replace(" *",' ')
    df[columnName][index] = df[columnName][index].replace("/ ",' ')
    df[columnName][index] = df[columnName][index].replace("撤回 ",' ')
    df[columnName][index] = df[columnName][index].replace("答复 ",' ')
    df[columnName][index] = df[columnName][index].replace("*",' ')
    df[columnName][index] = df[columnName][index].replace("/",' ')
  return df
In [ ]:
df_lang_clean = fn_remove_irrelaventWords_LevelTwo(df_lang,"combined_description")
In [ ]:
df_lang_clean.head(20)
Out[ ]:
Short description Description Caller Assignment group combined_description Language
0 login issue verified user details. employee and manager na... spxjnwir pjlcoqds GRP_0 login issue verified user details employee an... en
1 outlook team my meetings/skype meetings etc are not ap... hmjdrvpb komuaywn GRP_0 outlook team my meetings skype meetings etc ar... en
2 cant log in to vpn hi i cannot log on to vpn best eylqgodm ybqkwiam GRP_0 cant log in to vpn hi i cannot log on to vpn best en
3 unable to access hr tool page unable to access hr tool page xbkucsvz gcpydteq GRP_0 unable to access hr tool page unable to access... en
4 skype error skype error owlgqjme qhcozdfx GRP_0 skype error skype error no
5 unable to log in to engineering tool and skype unable to log in to engineering tool and skype eflahbxn ltdgrvkz GRP_0 unable to log in to engineering tool and skype... en
6 event: critical:hostname .company.com the valu... event: critical:hostname .company.com the valu... jyoqwxhz clhxsoqy GRP_1 event critical hostname company com the valu... en
7 ticket no employment status new non employee [... ticket no employment status new non employee [... eqzibjhw ymebpoih GRP_0 ticket no employment status new non employee ... en
8 unable to disable add ins on outlook unable to disable add ins on outlook mdbegvct dbvichlg GRP_0 unable to disable add ins on outlook unable to... en
9 ticket update on inplant ticket update on inplant fumkcsji sarmtlhy GRP_0 ticket update on inplant ticket update on inplant en
10 engineering tool says not connected and unable... engineering tool says not connected and unable... badgknqs xwelumfz GRP_0 engineering tool says not connected and unable... en
11 hr tool site not loading page correctly hr tool site not loading page correctly dcqsolkx kmsijcuz GRP_0 hr tool site not loading page correctly hr too... en
12 unable to login to hr tool to sgxqsuojr xwbeso... unable to login to hr tool to sgxqsuojr xwbeso... oblekmrw qltgvspb GRP_0 unable to login to hr tool to sgxqsuojr xwbeso... en
13 user wants to reset the password user wants to reset the password iftldbmu fujslwby GRP_0 user wants to reset the password user wants to... en
14 unable to open payslips unable to open payslips epwyvjsz najukwho GRP_0 unable to open payslips unable to open payslips es
15 ticket update on inplant ticket update on inplant fumkcsji sarmtlhy GRP_0 ticket update on inplant ticket update on inplant en
16 unable to login to company vpn hi i am unable to login to company vpn website... chobktqj qdamxfuc GRP_0 unable to login to company vpn hi i am unable ... en
17 when undocking pc screen will not come back when undocking pc screen will not come back sigfdwcj reofwzlm GRP_3 when undocking pc screen will not come back wh... en
18 erp sid account locked erp sid account locked nqdyowsm yqerwtna GRP_0 erp sid account locked erp sid account locked en
19 unable to sign into vpn unable to sign into vpn ftsqkvre bqzrupic GRP_0 unable to sign into vpn unable to sign into vpn it

7.1.1 Exporting the clean data to excel

In [ ]:
df_lang_clean.to_excel("df_Moji_langDet_Translated_clean_combinedDesc.xlsx")

8. Deterministic Rules

8.1 After analysis of the data, it was observed that few records has some patterns and keywords which gets assigned to a specific Group only.

Below function finds and assign groups based on Rules

In [ ]:
def deterministicRules(df,columnName):
  for i in range(df.shape[0]):
      #1 Contains telephony_software > GRP_7
    if pd.notna(df[columnName][i]):
      if ('telephony software' in df[columnName][i]):
        df['pred_group'][i] = 'GRP_7'
      #2 contains cutview >  GRP_66
      elif ('cutview' in df[columnName][i]):
        df['pred_group'][i] = 'GRP_66'
      #3 contains engg application >  GRP_58
      elif ('engg application' in df[columnName][i]):
        df['pred_group'][i] = 'GRP_58'
      #4 contains ethics >  GRP_23
      elif ('ethics' in df[columnName][i]):
        df['pred_group'][i] = 'GRP_23'
      # contains crm dynamics >  GRP_22
      elif ('crm dynamics' in df[columnName][i]):
        df['pred_group'][i] = 'GRP_22'
      # contains distributor tool & company center >  GRP_21
      elif ('distributor tool' in df[columnName][i]):
        df['pred_group'][i] = 'GRP_21'
      elif ('company center' in df[columnName][i]):
        df['pred_group'][i] = 'GRP_21'
      # contains bpctwhsn kzqsbmtp & network outage or circuit outage >  GRP_8
      elif ((df['Caller'][i] == 'bpctwhsn kzqsbmtp' and ('network outage' in df[columnName][i]) or 'circuit outage' in df[columnName][i])):
        df['pred_group'][i] = 'GRP_8'
      elif ('reset passwords' in df[columnName][i] and 'the' in df[columnName][i]):
        df['pred_group'][i] = 'GRP_17'
      elif (df[columnName][i].startswith('erp access issue')):
        df['pred_group'][i] = 'GRP_2'
      elif ('vsphere' in df[columnName][i] or 'esxi' in df[columnName][i]):
        df['pred_group'][i] = 'GRP_12'
      elif ('windows account' in df[columnName][i]):
        df['pred_group'][i] = 'GRP_0'
      elif ('erp sid account lock' in df[columnName][i]):
        df['pred_group'][i] = 'GRP_0'
      elif ('erp sid password reset' in df[columnName][i]):
        df['pred_group'][i] = 'GRP_0'
      elif(df['Caller'][i] == 'jionmpsf wnkpzcmv' and 'eutool' in df[columnName][i]):
        df['pred_group'][i] = 'GRP_24'
      elif(df['Caller'][i] == 'cwrikael oanmsecr' and 'eutool' in df[columnName][i]):
        df['pred_group'][i] = 'GRP_0'
      elif ('sso portal' in df[columnName][i]):
        df['pred_group'][i] = 'GRP_73'
      elif ('unable complete forecast' in df[columnName][i]):
        df['pred_group'][i] = 'GRP_67'
      elif (df[columnName][i].startswith('timecard') or df[columnName][i].startswith('time card')):
        df['pred_group'][i] = 'GRP_36'

8.2 Adding a new column in the data set 'pred_group' which will get populated when above function is executed

In [ ]:
df_lang_clean.insert(loc=5,column='pred_group',value=np.nan,allow_duplicates=True)
In [ ]:
df_lang_clean.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8417 entries, 0 to 8416
Data columns (total 7 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Short description     8417 non-null   object 
 1   Description           8417 non-null   object 
 2   Caller                8417 non-null   object 
 3   Assignment group      8417 non-null   object 
 4   combined_description  8417 non-null   object 
 5   pred_group            0 non-null      float64
 6   Language              8417 non-null   object 
dtypes: float64(1), object(6)
memory usage: 460.4+ KB
In [ ]:
deterministicRules(df_lang_clean,"combined_description")
df_determinted = df_lang_clean[~df_lang_clean['pred_group'].isna()]

8.2.1 After applying Deterministic rule, 557 records got ruled out from going into the Model

In [ ]:
df_determinted.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 557 entries, 18 to 8413
Data columns (total 7 columns):
 #   Column                Non-Null Count  Dtype 
---  ------                --------------  ----- 
 0   Short description     557 non-null    object
 1   Description           557 non-null    object
 2   Caller                557 non-null    object
 3   Assignment group      557 non-null    object
 4   combined_description  557 non-null    object
 5   pred_group            557 non-null    object
 6   Language              557 non-null    object
dtypes: object(7)
memory usage: 34.8+ KB
In [ ]:
df_determinted.head()
Out[ ]:
Short description Description Caller Assignment group combined_description pred_group Language
18 erp sid account locked erp sid account locked nqdyowsm yqerwtna GRP_0 erp sid account locked erp sid account locked GRP_0 en
25 erp sid password reset erp sid password reset jqhtkfsm xoehtbnl GRP_0 erp sid password reset erp sid password reset GRP_0 af
49 status does not change on telephony software when closing a call the agent keeps on the "on... aofnvyzt eqiyskhm GRP_7 status does not change on telephony software w... GRP_7 en
80 erp sid account locked erp sid account locked ilvortuq zfgrlewm GRP_0 erp sid account locked erp sid account locked GRP_0 en
154 reset passwords for person using password mana... employee is getting an error "user authenticat... hckvpary emxbpkwy GRP_17 reset passwords for person using password mana... GRP_17 en

8.3 Extracting those records for which Deterministic Rule didnt got applied.

In [ ]:
df_NonDet = df_lang_clean[df_lang_clean['pred_group'].isna()]
In [ ]:
df_NonDet = df_NonDet.reset_index()
df_NonDet = df_NonDet.drop(columns=['index'],axis=1)
df_NonDet.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7860 entries, 0 to 7859
Data columns (total 7 columns):
 #   Column                Non-Null Count  Dtype 
---  ------                --------------  ----- 
 0   Short description     7860 non-null   object
 1   Description           7860 non-null   object
 2   Caller                7860 non-null   object
 3   Assignment group      7860 non-null   object
 4   combined_description  7860 non-null   object
 5   pred_group            0 non-null      object
 6   Language              7860 non-null   object
dtypes: object(7)
memory usage: 430.0+ KB

8.4 Mering of Groups

We have Group till GRP_72. Now assigning and merging all groups to GRP_99 which are having count of records <=10

In [ ]:
df_NonDet.insert(loc=4,column='New Assignment Group',value=np.nan,allow_duplicates=True)
In [ ]:
groupsToBeMerged = pd.DataFrame(df_NonDet['Assignment group'].value_counts() <=10)
groupsToBeMerged = groupsToBeMerged[groupsToBeMerged['Assignment group'] == True]
groupsToBeMergedList = list(groupsToBeMerged.index)
groupsToBeMergedList
Out[ ]:
['GRP_52',
 'GRP_51',
 'GRP_55',
 'GRP_65',
 'GRP_59',
 'GRP_49',
 'GRP_46',
 'GRP_43',
 'GRP_32',
 'GRP_68',
 'GRP_63',
 'GRP_56',
 'GRP_38',
 'GRP_69',
 'GRP_54',
 'GRP_57',
 'GRP_71',
 'GRP_72',
 'GRP_23',
 'GRP_70',
 'GRP_67',
 'GRP_58',
 'GRP_61',
 'GRP_66',
 'GRP_64',
 'GRP_35']
In [ ]:
len(groupsToBeMergedList)
Out[ ]:
26
In [ ]:
df_NonDet.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7860 entries, 0 to 7859
Data columns (total 8 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Short description     7860 non-null   object 
 1   Description           7860 non-null   object 
 2   Caller                7860 non-null   object 
 3   Assignment group      7860 non-null   object 
 4   New Assignment Group  0 non-null      float64
 5   combined_description  7860 non-null   object 
 6   pred_group            0 non-null      object 
 7   Language              7860 non-null   object 
dtypes: float64(1), object(7)
memory usage: 491.4+ KB
In [ ]:
for index in range(df_NonDet.shape[0]):
  if (df_NonDet['Assignment group'][index] in groupsToBeMergedList):
    df_NonDet['New Assignment Group'][index] = 'GRP_99'
  else:
    df_NonDet['New Assignment Group'][index] = df_NonDet['Assignment group'][index]
In [ ]:
df_NonDet.tail(30)
Out[ ]:
Short description Description Caller Assignment group New Assignment Group combined_description pred_group Language
7830 hi it help team please unblock my new company ... Person Friday August: At NWFODMHC ExurcWKM WG:... ntydihzo aeptfbgs GRP_0 GRP_0 hi it help team please unblock my new company ... NaN en
7831 can you unblock my account so i can use outloo... nwfodmhc exurcwkm friday august : am prishry b... eqzibjhw ymebpoih GRP_0 GRP_0 can you unblock my account so i can use outloo... NaN en
7832 plant value added services one day pick route ... request to phase in additional vas customers. ... xnqzhtwu hivumtfz GRP_18 GRP_18 plant value added services one day pick route ... NaN en
7833 The computer can't open the computer TO Xiaohe morning computer can't open xqyjztnm onfusvlz GRP_30 GRP_30 the computer can't open the computer to xiaohe... NaN zh-cn
7834 ticket update rakthyesh ramdntythanjesh friday august : am u... eqzibjhw ymebpoih GRP_0 GRP_0 ticket update rakthyesh ramdntythanjesh friday... NaN en
7835 outlook freezing because of crm addin outlook freezing because of crm addin crjhotyk pxslorbe GRP_0 GRP_0 outlook freezing because of crm addin outlook ... NaN en
7836 inquiry about employee shesyhur posrt inquiry about employee shesyhur posrt pvlxjizg xzvlwqjc GRP_0 GRP_0 inquiry about employee shesyhur posrt inquiry ... NaN en
7837 etime time card update information. etime time card update information. tmopbken ibzougsd GRP_0 GRP_0 etime time card update information etime time... NaN it
7838 supply chain software account unlock and passw... supply chain software account unlock and passw... xjyuobma pzgqixlj GRP_0 GRP_0 supply chain software account unlock and passw... NaN en
7839 can't login to bex analyzer through vpn urgent best cfzsajbe lyejkdho GRP_0 GRP_0 can't login to bex analyzer through vpn urgent... NaN en
7840 beenefits access on oneteam yesterday it helped me access some additional ... bjitvswa yrmugfnq GRP_0 GRP_0 beenefits access on oneteam yesterday it helpe... NaN en
7841 unable to connect to hostname stehdgty jfhying called in for an issue where ... byfskuni mhvnqodk GRP_0 GRP_0 unable to connect to hostname stehdgty jfhying... NaN en
7842 customer group enhanced field a business decision has recently been made to ... nlearzwi ukdzstwi GRP_9 GRP_9 customer group enhanced field a business decis... NaN en
7843 Ess portal hi team i was going into the ess file and chec... eagvusbr nguqityl GRP_9 GRP_9 ess portal hi team i was going into the ess fi... NaN en
7844 robot hostname is inactive robot hostname is inactive rkupnshb gsmzfojw GRP_8 GRP_8 robot hostname is inactive robot hostname is i... NaN en
7845 fw: case id [ref: case :ref] :: others person thursday august : pm nwfodmhc exurcwkm ... pacvbetl yptglhoe GRP_0 GRP_0 fw case id ref case ref others person ... NaN en
7846 please remove user person ralfteimp from palo ... please remove user person ralfteimp from palo ... hugcadrn ixhlwdgt GRP_2 GRP_2 please remove user person ralfteimp from palo ... NaN en
7847 ticket update on inc to user hbmwlprq ilfvyodx ticket update on inc to user hbmwlprq ilfvyodx fumkcsji sarmtlhy GRP_0 GRP_0 ticket update on inc to user hbmwlprq ilfvyodx... NaN en
7848 ticket update on ticket no ticket update on ticket no fumkcsji sarmtlhy GRP_0 GRP_0 ticket update on ticket no ticket update on ti... NaN sv
7849 erp account unlock name:mfeyouli ndobtzpw language: browser:micro... rbozivdq gmlhrtvp GRP_0 GRP_0 erp account unlock name mfeyouli ndobtzpw lang... NaN en
7850 account locked account locked sdvlxbfe ptnahjkw GRP_0 GRP_0 account locked account locked NaN en
7851 check status in purchasing please contact ed pasgryowski pasgryo about hi... mpihysnw wrctgoan GRP_29 GRP_29 check status in purchasing please contact ed p... NaN en
7852 vpn for laptop i need a vpn for my new laptop. name llv kneth... jxgobwrm qkugdipo GRP_34 GRP_34 vpn for laptop i need a vpn for my new laptop ... NaN en
7853 hr tool etime option not visitble hr tool etime option not visitble tmopbken ibzougsd GRP_0 GRP_0 hr tool etime option not visitble hr tool etim... NaN en
7854 erp fi ob two accounts to be added i am sorry i have another two accounts that ne... ipwjorsc uboapexr GRP_10 GRP_10 erp fi ob two accounts to be added i am sorry ... NaN en
7855 tablet needs reimaged due to multiple issues w... tablet needs reimaged due to multiple issues w... cpmaidhj elbaqmtp GRP_3 GRP_3 tablet needs reimaged due to multiple issues w... NaN en
7856 emails not coming in from zz mail good afternoon i am not receiving the emails t... avglmrts vhqmtiua GRP_29 GRP_29 emails not coming in from zz mail good afterno... NaN en
7857 vip: windows password reset for tifpdchb pedxruyf vip: windows password reset for tifpdchb pedxruyf oybwdsgx oxyhwrfz GRP_0 GRP_0 vip windows password reset for tifpdchb pedxr... NaN en
7858 Machine is not working i am unable to access the machine utilities to... ufawcgob aowhxjky GRP_62 GRP_62 machine is not working i am unable to access t... NaN en
7859 Several prgramdntyme can not be opened on seve... Several prgramdntyme can not be opened on seve... kqvbrspl jyzoklfx GRP_49 GRP_99 several prgramdntyme can not be opened on seve... NaN de
In [ ]:
df_NonDet['New Assignment Group'].unique()
Out[ ]:
array(['GRP_0', 'GRP_1', 'GRP_3', 'GRP_4', 'GRP_5', 'GRP_6', 'GRP_8',
       'GRP_9', 'GRP_10', 'GRP_11', 'GRP_12', 'GRP_13', 'GRP_14',
       'GRP_15', 'GRP_16', 'GRP_18', 'GRP_19', 'GRP_2', 'GRP_20',
       'GRP_21', 'GRP_22', 'GRP_24', 'GRP_25', 'GRP_26', 'GRP_27',
       'GRP_28', 'GRP_29', 'GRP_30', 'GRP_31', 'GRP_17', 'GRP_33',
       'GRP_34', 'GRP_99', 'GRP_36', 'GRP_37', 'GRP_39', 'GRP_40',
       'GRP_41', 'GRP_42', 'GRP_7', 'GRP_44', 'GRP_45', 'GRP_47',
       'GRP_48', 'GRP_50', 'GRP_53', 'GRP_60', 'GRP_62'], dtype=object)
In [ ]:
df_NonDet['New Assignment Group'].value_counts()
Out[ ]:
GRP_0     3660
GRP_8      591
GRP_24     285
GRP_9      252
GRP_12     251
GRP_19     213
GRP_2      209
GRP_3      197
GRP_6      182
GRP_13     143
GRP_10     139
GRP_5      128
GRP_14     116
GRP_25     115
GRP_33     103
GRP_29      97
GRP_4       95
GRP_18      88
GRP_99      88
GRP_16      84
GRP_31      67
GRP_34      62
GRP_26      55
GRP_28      44
GRP_41      40
GRP_40      40
GRP_30      39
GRP_42      37
GRP_15      36
GRP_45      35
GRP_20      33
GRP_1       31
GRP_11      30
GRP_22      27
GRP_47      27
GRP_48      25
GRP_62      25
GRP_7       21
GRP_17      19
GRP_39      19
GRP_27      17
GRP_60      16
GRP_37      15
GRP_44      15
GRP_50      14
GRP_21      13
GRP_53      11
GRP_36      11
Name: New Assignment Group, dtype: int64

After Merging group, there were only 47 unique groups left and 26 groups got merged to GRP_99 which had count <=10.

9. Splitting the dataset into data for Machine Learning Model and Deep Learning Model.

Reason for this split is the further preprocessing required for ML Model which are not required for DL Model

In [ ]:
df_ML = df_NonDet.copy() # Creating copy 
df_DL = df_NonDet.copy()
In [ ]:
df_ML.shape, df_DL.shape
Out[ ]:
((7860, 8), (7860, 8))
In [ ]:
df_DL.to_excel("df_DL.xlsx")

9.1 Preprocessing Data for ML Model

9.1.1 Applying Stopwords Removal

In [ ]:
nltk.download('stopwords')
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
Out[ ]:
True
In [ ]:
stopwords = set(stopwords.words('english'))
 # Remove stopwords
df_ML['combined_description'] = df_ML['combined_description'].apply(lambda x: ' '.join([word for word in x.split() if word not in stopwords]))
In [ ]:
df_ML.head()
Out[ ]:
Short description Description Caller Assignment group New Assignment Group combined_description pred_group Language
0 login issue verified user details. employee and manager na... spxjnwir pjlcoqds GRP_0 GRP_0 login issue verified user details employee man... NaN en
1 outlook team my meetings/skype meetings etc are not ap... hmjdrvpb komuaywn GRP_0 GRP_0 outlook team meetings skype meetings etc appea... NaN en
2 cant log in to vpn hi i cannot log on to vpn best eylqgodm ybqkwiam GRP_0 GRP_0 cant log vpn hi cannot log vpn best NaN en
3 unable to access hr tool page unable to access hr tool page xbkucsvz gcpydteq GRP_0 GRP_0 unable access hr tool page unable access hr to... NaN en
4 skype error skype error owlgqjme qhcozdfx GRP_0 GRP_0 skype error skype error NaN no

9.1.2 Applyting Lemmatization of words

In [ ]:
# Initialize spacy 'en' medium model, keeping only tagger component needed for lemmatization
nlp = spacy.load('en', disable=['parser', 'ner'])

# Define a function to lemmatize the descriptions
def lemmatizer(sentence):
    # Parse the sentence using the loaded 'en' model object `nlp`
    doc = nlp(sentence)
    return " ".join([token.lemma_ for token in doc if token.lemma_ !='-PRON-'])
In [ ]:
df_ML['combined_description'] = df_ML['combined_description'].apply(lemmatizer)
In [ ]:
df_ML.head(10)
Out[ ]:
Short description Description Caller Assignment group New Assignment Group combined_description pred_group Language
0 login issue verified user details. employee and manager na... spxjnwir pjlcoqds GRP_0 GRP_0 login issue verify user detail employee manage... NaN en
1 outlook team my meetings/skype meetings etc are not ap... hmjdrvpb komuaywn GRP_0 GRP_0 outlook team meeting skype meeting etc appear ... NaN en
2 cant log in to vpn hi i cannot log on to vpn best eylqgodm ybqkwiam GRP_0 GRP_0 can not log vpn hi can not log vpn best NaN en
3 unable to access hr tool page unable to access hr tool page xbkucsvz gcpydteq GRP_0 GRP_0 unable access hr tool page unable access hr to... NaN en
4 skype error skype error owlgqjme qhcozdfx GRP_0 GRP_0 skype error skype error NaN no
5 unable to log in to engineering tool and skype unable to log in to engineering tool and skype eflahbxn ltdgrvkz GRP_0 GRP_0 unable log engineering tool skype unable log e... NaN en
6 event: critical:hostname .company.com the valu... event: critical:hostname .company.com the valu... jyoqwxhz clhxsoqy GRP_1 GRP_1 event critical hostname company com value moun... NaN en
7 ticket no employment status new non employee [... ticket no employment status new non employee [... eqzibjhw ymebpoih GRP_0 GRP_0 ticket employment status new non employee ente... NaN en
8 unable to disable add ins on outlook unable to disable add ins on outlook mdbegvct dbvichlg GRP_0 GRP_0 unable disable add ins outlook unable disable ... NaN en
9 ticket update on inplant ticket update on inplant fumkcsji sarmtlhy GRP_0 GRP_0 ticket update inplant ticket update inplant NaN en

9.1.3 Exporting the data with Stop words removed and word Lemmatization applied

In [ ]:
df_ML.to_excel("df_ML_StopWords_Lemmatized.xlsx")

10. Data Visualization

10.1 Word Cloud : Most Frequent Words in whole Dataset

In [ ]:
# define wordcloud function from wordcloud library. set some parameteres for beatuful plotting
wc = WordCloud(width = 3000,
    height = 2000,max_words=300,
    background_color = 'black')

# generate word cloud for data
wc_word=wc.generate(str(df_ML.combined_description))
print(wc_word)

# declare our figure 
plt.figure(figsize=(20,10), facecolor='k')

# add title to the graph
plt.title("Most frequent words in dataset", fontsize=20,color='white')
plt.imshow(wc_word)
plt.show()
<wordcloud.wordcloud.WordCloud object at 0x7fda5be03c50>

10.2 Ingram word Analysis

In [ ]:
#ngram function
def ngram_extractor(text, n_gram):
    token = [token for token in text.lower().split(" ") if token != "" if token not in STOPWORDS]
    ngrams = zip(*[token[i:] for i in range(n_gram)])
    return [" ".join(ngram) for ngram in ngrams]

# Function to generate a dataframe with n_gram and top max_row frequencies
def generate_ngrams(df, n_gram, max_row):
    temp_dict = defaultdict(int)
    for question in df:
        for word in ngram_extractor(question, n_gram):
            temp_dict[word] += 1
    temp_df = pd.DataFrame(sorted(temp_dict.items(), key=lambda x: x[1])[::-1]).head(max_row)
    temp_df.columns = ["word", "wordcount"]
    return temp_df
In [ ]:
df_ML.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7860 entries, 0 to 7859
Data columns (total 8 columns):
 #   Column                Non-Null Count  Dtype 
---  ------                --------------  ----- 
 0   Short description     7860 non-null   object
 1   Description           7860 non-null   object
 2   Caller                7860 non-null   object
 3   Assignment group      7860 non-null   object
 4   New Assignment Group  7860 non-null   object
 5   combined_description  7860 non-null   object
 6   pred_group            0 non-null      object
 7   Language              7860 non-null   object
dtypes: object(8)
memory usage: 491.4+ KB
In [ ]:
Ticket_desc = df_ML['combined_description']
#Define empty list
ticket_desc_cleaned = []
res = []
#Define for loop to iterate through the elements of the ticket_desc
for l in Ticket_desc:
    #Parse the contents of the cell
    soup = BeautifulSoup(l, 'html.parser')
    #Find all instances of the text within the </p> tag
    for el in soup.find_all('p'):
        res.append(el.get_text())
    #concatenate the strings from the list    
    endstring = ' '.join(map(str, res))
    #reset list
    res = []
    #Append the concatenated string to the main list
    ticket_desc_cleaned.append(endstring)
In [ ]:
ticket_desc_na_cleaned = [item.lower() for item in Ticket_desc]
#remove html links from list 
ticket_desc_na_cleaned =  [re.sub(r"http\S+", "", item) for item in ticket_desc_na_cleaned]
#remove special characters left
ticket_desc_na_cleaned = [re.sub(r"[-()\"#/@;:<>{}`+=~|.!?,]", "", item) for item in ticket_desc_na_cleaned]

#convert to dataframe and rename the column of the ticket_desc_na_cleaned list
ticket_desc_clean = pd.DataFrame(np.array(ticket_desc_na_cleaned).reshape(-1))
ticket_desc_clean.columns = ["ans"]
#Squeeze dataframe to obtain series
desc_cleaned = ticket_desc_clean.squeeze()
In [ ]:
#generate unigram
ans_unigram = generate_ngrams(desc_cleaned, 1, 30)
In [ ]:
ans_unigram.head()
Out[ ]:
word wordcount
0 job 4987
1 please 2211
2 password 2209
3 scheduler 1888
4 erp 1796
In [ ]:
ans_unigram.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 30 entries, 0 to 29
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   word       30 non-null     object
 1   wordcount  30 non-null     int64 
dtypes: int64(1), object(1)
memory usage: 608.0+ bytes
In [ ]:
#generate barplot for unigram
plt.figure(figsize=(12,8))
sns.barplot(ans_unigram["wordcount"],ans_unigram["word"])
plt.xlabel("Word Count", fontsize=15)
plt.ylabel("Unigrams", fontsize=15)
plt.title("Top 30 Unigrams for Combined Column Translated to English")
plt.show()
In [ ]:
# define wordcloud function from wordcloud library. set some parameteres for beatuful plotting
wc = WordCloud(width = 3000,
    height = 2000,max_words=300,
    background_color = 'black')

# generate word cloud for data
#wc_word=wc.generate(str(ans_unigram))
wc_word=wc.generate(str(ans_unigram['word']))
print(wc_word)

# declare our figure 
plt.figure(figsize=(20,10), facecolor='k')

# add title to the graph
plt.title("Most frequent words in dataset", fontsize=20,color='white')
plt.imshow(wc_word)
plt.show()
<wordcloud.wordcloud.WordCloud object at 0x7fda5d437550>

10.3 Bigram word Analysis

In [ ]:
#generate bigram
ans_bigram = generate_ngrams(desc_cleaned, 2, 20)
In [ ]:
#generate barplot for bigram
plt.figure(figsize=(12,8))
sns.barplot(ans_bigram["wordcount"],ans_bigram["word"])
plt.xlabel("Word Count", fontsize=15)
plt.ylabel("Bigrams", fontsize=15)
plt.title("Top 20 Bigrams for Combined Column Translated to English")
plt.show()
In [ ]:
# define wordcloud function from wordcloud library. set some parameteres for beatuful plotting
wc = WordCloud(width = 3000,
    height = 2000,max_words=300,
    background_color = 'black')

# generate word cloud for data
wc_word=wc.generate(str(ans_bigram['word']))
print(wc_word)

# declare our figure 
plt.figure(figsize=(20,10), facecolor='k')

# add title to the graph
plt.title("Most frequent words in dataset", fontsize=20,color='white')
plt.imshow(wc_word)
plt.show()
<wordcloud.wordcloud.WordCloud object at 0x7fdbabef9bd0>

10.3 Trigram word Analysis

In [ ]:
#generate trigram
ans_trigram = generate_ngrams(desc_cleaned, 3, 20)
In [ ]:
#generate barplot for bigram
plt.figure(figsize=(12,8))
sns.barplot(ans_trigram["wordcount"],ans_trigram["word"])
plt.xlabel("Word Count", fontsize=15)
plt.ylabel("Trigrams", fontsize=15)
plt.title(" Top 20 Trigrams for ticket description")
plt.show()
In [ ]:
# define wordcloud function from wordcloud library. set some parameteres for beatuful plotting
wc = WordCloud(width = 3000,
    height = 2000,max_words=300,
    background_color = 'black')

# generate word cloud for data
wc_word=wc.generate(str(ans_trigram['word']))
print(wc_word)

# declare our figure 
plt.figure(figsize=(20,10), facecolor='k')

# add title to the graph
plt.title("Most frequent words in dataset", fontsize=20,color='white')
plt.imshow(wc_word)
plt.show()
<wordcloud.wordcloud.WordCloud object at 0x7fdbae0cd850>

10.4 Convert Combined text from each sentense to the words. use of simple_process as it tokenize() internally

In [ ]:
#https://radimrehurek.com/gensim/utils.html#gensim.utils.simple_preprocess
def sent_to_words(sentences):
    for sentence in sentences:
        yield(gensim.utils.simple_preprocess(str(sentence), deacc=True))  # deacc=True removes punctuations
In [ ]:
data_words = list(sent_to_words(df_ML['combined_description']))
In [ ]:
print(len(data_words))
7860
In [ ]:
# Build the bigram and trigram models
#https://radimrehurek.com/gensim/models/phrases.html
bigram = gensim.models.Phrases(data_words, min_count=5, threshold=100) # higher threshold fewer phrases.
trigram = gensim.models.Phrases(bigram[data_words], threshold=100)  

# Faster way to get a sentence clubbed as a trigram/bigram
bigram_mod = gensim.models.phrases.Phraser(bigram)
trigram_mod = gensim.models.phrases.Phraser(trigram)
In [ ]:
print(bigram_mod[data_words[1]])
['outlook', 'team', 'meeting', 'skype', 'meeting', 'etc', 'appear', 'outlook', 'calendar', 'somebody', 'please', 'advise', 'correct', 'kind']
In [ ]:
print(trigram_mod[data_words[1]])
['outlook', 'team', 'meeting', 'skype', 'meeting', 'etc', 'appear', 'outlook', 'calendar', 'somebody', 'please', 'advise', 'correct', 'kind']
In [ ]:
def make_trigrams(texts):
    return [trigram_mod[bigram_mod[doc]] for doc in texts]
In [ ]:
# Form Bigrams
data_words_trigrams = make_trigrams(data_words)
In [ ]:
wordclouds=' '.join(map(str, data_words_trigrams))
In [ ]:
wordCloudfinal = wordclouds.replace('\', \'',' ').replace(']',' ').replace('[',' ').replace('\'','').replace('   ',' ').replace('*','')
In [ ]:
wordcloud = WordCloud(width=480, height=480, max_font_size=20, min_font_size=10).generate(wordCloudfinal)
plt.figure(figsize=(20,10))
plt.imshow(wordcloud, interpolation="bilinear")
plt.axis("off")
plt.margins(x=0, y=0)
plt.show()
In [ ]:
new_df = df_ML.copy()
new_df['words'] = data_words_trigrams
new_df.head()
Out[ ]:
Short description Description Caller Assignment group New Assignment Group combined_description pred_group Language words
0 login issue verified user details. employee and manager na... spxjnwir pjlcoqds GRP_0 GRP_0 login issue verify user detail employee manage... NaN en [login, issue, verify, user, detail, employee,...
1 outlook team my meetings/skype meetings etc are not ap... hmjdrvpb komuaywn GRP_0 GRP_0 outlook team meeting skype meeting etc appear ... NaN en [outlook, team, meeting, skype, meeting, etc, ...
2 cant log in to vpn hi i cannot log on to vpn best eylqgodm ybqkwiam GRP_0 GRP_0 can not log vpn hi can not log vpn best NaN en [can_not, log, vpn, hi, can_not, log, vpn, best]
3 unable to access hr tool page unable to access hr tool page xbkucsvz gcpydteq GRP_0 GRP_0 unable access hr tool page unable access hr to... NaN en [unable, access, hr, tool, page, unable, acces...
4 skype error skype error owlgqjme qhcozdfx GRP_0 GRP_0 skype error skype error NaN no [skype, error, skype, error]
In [ ]:
sortedListOfGroup = df_ML['New Assignment Group'].value_counts().sort_values(ascending=False).index
sortedListOfGroup
Out[ ]:
Index(['GRP_0', 'GRP_8', 'GRP_24', 'GRP_9', 'GRP_12', 'GRP_19', 'GRP_2',
       'GRP_3', 'GRP_6', 'GRP_13', 'GRP_10', 'GRP_5', 'GRP_14', 'GRP_25',
       'GRP_33', 'GRP_29', 'GRP_4', 'GRP_18', 'GRP_99', 'GRP_16', 'GRP_31',
       'GRP_34', 'GRP_26', 'GRP_28', 'GRP_40', 'GRP_41', 'GRP_30', 'GRP_42',
       'GRP_15', 'GRP_45', 'GRP_20', 'GRP_1', 'GRP_11', 'GRP_47', 'GRP_22',
       'GRP_62', 'GRP_48', 'GRP_7', 'GRP_17', 'GRP_39', 'GRP_27', 'GRP_60',
       'GRP_44', 'GRP_37', 'GRP_50', 'GRP_21', 'GRP_36', 'GRP_53'],
      dtype='object')
In [ ]:
def wordcloud_grp(f, x):
    wordclouds_0=' '.join(map(str, f))

    wc = WordCloud(width=480, height=480, max_font_size=20, min_font_size=10, max_words=50).generate(wordclouds_0.replace('\'',''))
    plt.figure(figsize=(20,10))
    plt.imshow(wc, interpolation="bilinear")
    plt.axis("off")
    plt.title("Most common 50 words of {}".format(x))
    plt.margins(x=0, y=0)
    plt.show()

10.5 Word Cloud of Most common 50 words in Top 10 Groups based on count.

In [ ]:
for i in range(10):

    Grp = new_df[new_df ['New Assignment Group'] == sortedListOfGroup[i]]
    Grp = Grp['words']
    wordcloud_grp(Grp,sortedListOfGroup[i])
In [ ]:
df_ML.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7860 entries, 0 to 7859
Data columns (total 8 columns):
 #   Column                Non-Null Count  Dtype 
---  ------                --------------  ----- 
 0   Short description     7860 non-null   object
 1   Description           7860 non-null   object
 2   Caller                7860 non-null   object
 3   Assignment group      7860 non-null   object
 4   New Assignment Group  7860 non-null   object
 5   combined_description  7860 non-null   object
 6   pred_group            0 non-null      object
 7   Language              7860 non-null   object
dtypes: object(8)
memory usage: 491.4+ KB
In [ ]:
df_DL.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7860 entries, 0 to 7859
Data columns (total 8 columns):
 #   Column                Non-Null Count  Dtype 
---  ------                --------------  ----- 
 0   Short description     7860 non-null   object
 1   Description           7860 non-null   object
 2   Caller                7860 non-null   object
 3   Assignment group      7860 non-null   object
 4   New Assignment Group  7860 non-null   object
 5   combined_description  7860 non-null   object
 6   pred_group            0 non-null      object
 7   Language              7860 non-null   object
dtypes: object(8)
memory usage: 491.4+ KB

Making copies of the datasets for different steps

In [ ]:
df_ML_Aug = df_ML.copy()
df_ML_NonAug = df_ML.copy()
df_DL_Aug = df_DL.copy()
df_DL_NonAug = df_DL.copy()
In [ ]:
df_ML_Aug.shape, df_ML_NonAug.shape, df_DL_Aug.shape,df_DL_NonAug.shape
Out[ ]:
((7860, 8), (7860, 8), (7860, 8), (7860, 8))
In [ ]:
df_ML_Aug.to_excel("df_ML_Aug.xlsx")
df_ML_NonAug.to_excel("df_ML_NonAug.xlsx")
df_DL_Aug.to_excel("df_DL_Aug.xlsx")
df_DL_NonAug.to_excel("df_DL_NonAug.xlsx")

11. Modeling - ML Model

11.1 ML Model without upsampling or augmentation

Here we maintain the classes as per their original proportion and try constructing the model to see how the performance looks

In [ ]:
df_ML_NonAug.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7860 entries, 0 to 7859
Data columns (total 8 columns):
 #   Column                Non-Null Count  Dtype 
---  ------                --------------  ----- 
 0   Short description     7860 non-null   object
 1   Description           7860 non-null   object
 2   Caller                7860 non-null   object
 3   Assignment group      7860 non-null   object
 4   New Assignment Group  7860 non-null   object
 5   combined_description  7860 non-null   object
 6   pred_group            0 non-null      object
 7   Language              7860 non-null   object
dtypes: object(8)
memory usage: 491.4+ KB
In [ ]:
df_ML_NonAug.head()
Out[ ]:
Short description Description Caller Assignment group New Assignment Group combined_description pred_group Language
0 login issue verified user details. employee and manager na... spxjnwir pjlcoqds GRP_0 GRP_0 login issue verify user detail employee manage... NaN en
1 outlook team my meetings/skype meetings etc are not ap... hmjdrvpb komuaywn GRP_0 GRP_0 outlook team meeting skype meeting etc appear ... NaN en
2 cant log in to vpn hi i cannot log on to vpn best eylqgodm ybqkwiam GRP_0 GRP_0 can not log vpn hi can not log vpn best NaN en
3 unable to access hr tool page unable to access hr tool page xbkucsvz gcpydteq GRP_0 GRP_0 unable access hr tool page unable access hr to... NaN en
4 skype error skype error owlgqjme qhcozdfx GRP_0 GRP_0 skype error skype error NaN no
In [ ]:
df_ML_NonAug['target'] = df_ML_NonAug['Assignment group'].astype('category').cat.codes
In [ ]:
df_ML_NonAug.groupby(["Assignment group", "target"]).size()
Out[ ]:
Assignment group  target
GRP_0             0         3660
GRP_1             1           31
GRP_10            2          139
GRP_11            3           30
GRP_12            4          251
GRP_13            5          143
GRP_14            6          116
GRP_15            7           36
GRP_16            8           84
GRP_17            9           19
GRP_18            10          88
GRP_19            11         213
GRP_2             12         209
GRP_20            13          33
GRP_21            14          13
GRP_22            15          27
GRP_23            16           1
GRP_24            17         285
GRP_25            18         115
GRP_26            19          55
GRP_27            20          17
GRP_28            21          44
GRP_29            22          97
GRP_3             23         197
GRP_30            24          39
GRP_31            25          67
GRP_32            26           4
GRP_33            27         103
GRP_34            28          62
GRP_35            29           1
GRP_36            30          11
GRP_37            31          15
GRP_38            32           3
GRP_39            33          19
GRP_4             34          95
GRP_40            35          40
GRP_41            36          40
GRP_42            37          37
GRP_43            38           5
GRP_44            39          15
GRP_45            40          35
GRP_46            41           6
GRP_47            42          27
GRP_48            43          25
GRP_49            44           6
GRP_5             45         128
GRP_50            46          14
GRP_51            47           8
GRP_52            48           9
GRP_53            49          11
GRP_54            50           2
GRP_55            51           8
GRP_56            52           3
GRP_57            53           2
GRP_58            54           1
GRP_59            55           6
GRP_6             56         182
GRP_60            57          16
GRP_61            58           1
GRP_62            59          25
GRP_63            60           3
GRP_64            61           1
GRP_65            62           6
GRP_66            63           1
GRP_67            64           1
GRP_68            65           3
GRP_69            66           2
GRP_7             67          21
GRP_70            68           1
GRP_71            69           2
GRP_72            70           2
GRP_8             71         591
GRP_9             72         252
dtype: int64
In [ ]:
# Create training and test datasets with 80:20 ratio
X_train, X_test, y_train, y_test = train_test_split(df_ML_NonAug.combined_description  , 
                                                    df_ML_NonAug.target, 
                                                    test_size=0.20, 
                                                    random_state=42)
print('\033[1mShape of the training set:\033[0m', X_train.shape, y_train.shape)
print('\033[1mShape of the test set:\033[0m', X_test.shape, y_test.shape)
Shape of the training set: (6288,) (6288,)
Shape of the test set: (1572,) (1572,)
In [ ]:
def fit_n_print(model, X_train, X_test, y_train, y_test):  # take the model, train data and test data as input
    
    start = time.time()  # note the start time 

    clf = model
    clf = Pipeline([('vect', CountVectorizer()),
                     ('tfidf', TfidfTransformer()),
                     ('model', model),
                     ])

    clf.fit(X_train, y_train)   # fit the model using the train data

    pred_train=clf.predict(X_train) # model predictions on the training data
    y_pred = clf.predict(X_test)   # model predictions on the test data
    
    accuracy_training = (accuracy_score(y_train,pred_train))*100 #accurancy on training
    accuracy_test= (accuracy_score(y_test,y_pred ))*100  #accuracy on test
    

    recallscore_training =(recall_score(y_train,pred_train,average='weighted'))*100  #recall on training
    recallscore_test = (recall_score(y_test,y_pred, average='weighted'))*100  #recall on test
    
    precision_training = (precision_score(y_train,pred_train, average='weighted'))*100
    precision_test = (precision_score(y_test,y_pred,average='weighted'))*100
    
    f1score_training = (f1_score(y_train,pred_train, average='weighted'))*100
    f1score_test = (f1_score(y_test,y_pred, average='weighted'))*100
        
    end = time.time()  #note the end time
       
    duration = end - start  # calculate the total duration

    print('Algorithm:', type(model).__name__)
    print("\n Classification report:\n", classification_report(y_test, y_pred))
    print("\n Confusion report:\n", confusion_matrix(y_test, y_pred))
    print("Accuracy Score:", accuracy_score(y_test, y_pred))
    print()
    print("\n \n")
     
    

    return  accuracy_training,accuracy_test,recallscore_training, recallscore_test, precision_training,precision_test,f1score_training, f1score_test,  duration, y_pred  # return all the metrics along with predictions
In [ ]:
import time
In [ ]:
rf  = RandomForestClassifier()
xgb = XGBClassifier()
SVC = LinearSVC()
KNN = KNeighborsClassifier()
NB = MultinomialNB()

result = {}   # Create an empty dictionary to later use to store metrics of each of the models

for model, name  in zip([rf,xgb, SVC,KNN,NB], 
                         ['Random Forest', 'Xgboost', 'SVC','KNN','Naive Bayes']):
    result[name] = fit_n_print(model,X_train, X_test, y_train, y_test)
Algorithm: RandomForestClassifier

 Classification report:
               precision    recall  f1-score   support

           0       0.64      0.98      0.77       733
           1       1.00      0.20      0.33         5
           2       1.00      0.24      0.39        29
           3       0.00      0.00      0.00         8
           4       0.63      0.51      0.56        61
           5       0.47      0.35      0.40        23
           6       0.75      0.25      0.38        24
           7       0.00      0.00      0.00         8
           8       0.00      0.00      0.00        10
           9       0.57      0.80      0.67         5
          10       0.83      0.22      0.34        23
          11       0.40      0.10      0.16        40
          12       0.59      0.25      0.35        40
          13       1.00      0.17      0.29         6
          14       0.00      0.00      0.00         3
          15       0.00      0.00      0.00         8
          16       0.00      0.00      0.00         1
          17       0.98      0.80      0.88        56
          18       0.71      0.36      0.48        14
          19       0.00      0.00      0.00        14
          20       0.00      0.00      0.00         4
          21       0.00      0.00      0.00        16
          22       0.71      0.24      0.36        21
          23       0.75      0.17      0.28        35
          24       0.00      0.00      0.00         7
          25       0.00      0.00      0.00        11
          27       0.60      0.16      0.25        19
          28       0.50      0.25      0.33         8
          30       1.00      1.00      1.00         1
          31       0.00      0.00      0.00         3
          33       1.00      0.50      0.67         2
          34       0.33      0.05      0.08        21
          35       0.00      0.00      0.00         7
          36       1.00      0.17      0.29         6
          37       0.00      0.00      0.00         6
          38       0.00      0.00      0.00         3
          39       0.00      0.00      0.00         5
          40       0.00      0.00      0.00         7
          41       0.00      0.00      0.00         3
          42       0.00      0.00      0.00         9
          43       0.00      0.00      0.00         4
          44       0.00      0.00      0.00         2
          45       0.74      0.52      0.61        27
          46       0.00      0.00      0.00         2
          47       0.00      0.00      0.00         2
          48       0.00      0.00      0.00         1
          49       0.00      0.00      0.00         1
          51       0.00      0.00      0.00         2
          56       0.82      0.33      0.47        43
          57       0.00      0.00      0.00         4
          59       0.00      0.00      0.00         5
          61       0.00      0.00      0.00         1
          62       0.00      0.00      0.00         1
          66       0.00      0.00      0.00         0
          67       0.00      0.00      0.00         3
          70       0.00      0.00      0.00         1
          71       0.53      0.78      0.63       112
          72       0.37      0.18      0.24        56

    accuracy                           0.63      1572
   macro avg       0.31      0.16      0.19      1572
weighted avg       0.58      0.63      0.55      1572


 Confusion report:
 [[720   0   0 ...   0   0   0]
 [  2   1   0 ...   0   1   0]
 [ 17   0   7 ...   0   3   0]
 ...
 [  1   0   0 ...   0   0   0]
 [  3   0   0 ...   0  87  10]
 [ 15   0   0 ...   0  30  10]]
Accuracy Score: 0.6310432569974554


 

Algorithm: XGBClassifier

 Classification report:
               precision    recall  f1-score   support

           0       0.66      0.96      0.79       733
           1       0.00      0.00      0.00         5
           2       0.77      0.34      0.48        29
           3       0.00      0.00      0.00         8
           4       0.64      0.48      0.55        61
           5       0.48      0.43      0.45        23
           6       0.88      0.29      0.44        24
           7       1.00      0.12      0.22         8
           8       0.62      0.50      0.56        10
           9       0.62      1.00      0.77         5
          10       0.71      0.22      0.33        23
          11       0.47      0.17      0.25        40
          12       0.59      0.33      0.42        40
          13       0.00      0.00      0.00         6
          14       0.00      0.00      0.00         3
          15       0.00      0.00      0.00         8
          16       0.00      0.00      0.00         1
          17       0.98      0.79      0.87        56
          18       0.50      0.57      0.53        14
          19       0.00      0.00      0.00        14
          20       0.00      0.00      0.00         4
          21       0.00      0.00      0.00        16
          22       0.56      0.43      0.49        21
          23       0.80      0.11      0.20        35
          24       0.50      0.14      0.22         7
          25       0.50      0.09      0.15        11
          27       0.33      0.05      0.09        19
          28       0.25      0.12      0.17         8
          30       1.00      1.00      1.00         1
          31       0.00      0.00      0.00         3
          33       1.00      0.50      0.67         2
          34       1.00      0.14      0.25        21
          35       0.00      0.00      0.00         7
          36       0.60      0.50      0.55         6
          37       1.00      0.17      0.29         6
          38       0.00      0.00      0.00         3
          39       0.00      0.00      0.00         5
          40       0.50      0.14      0.22         7
          41       0.00      0.00      0.00         3
          42       0.50      0.22      0.31         9
          43       1.00      0.25      0.40         4
          44       0.00      0.00      0.00         2
          45       0.74      0.52      0.61        27
          46       0.00      0.00      0.00         2
          47       0.00      0.00      0.00         2
          48       0.00      0.00      0.00         1
          49       0.00      0.00      0.00         1
          51       1.00      1.00      1.00         2
          56       0.89      0.40      0.55        43
          57       0.00      0.00      0.00         4
          59       0.00      0.00      0.00         5
          61       0.00      0.00      0.00         1
          62       0.00      0.00      0.00         1
          66       0.00      0.00      0.00         0
          67       0.00      0.00      0.00         3
          70       0.00      0.00      0.00         1
          71       0.54      0.79      0.64       112
          72       0.41      0.21      0.28        56

    accuracy                           0.65      1572
   macro avg       0.38      0.22      0.25      1572
weighted avg       0.61      0.65      0.58      1572


 Confusion report:
 [[707   0   1 ...   0   0   0]
 [  2   0   0 ...   0   1   0]
 [ 12   0  10 ...   0   3   0]
 ...
 [  1   0   0 ...   0   0   0]
 [  5   0   0 ...   0  89  10]
 [ 13   0   0 ...   0  30  12]]
Accuracy Score: 0.6456743002544529


 

Algorithm: LinearSVC

 Classification report:
               precision    recall  f1-score   support

           0       0.76      0.94      0.84       733
           1       0.50      0.40      0.44         5
           2       0.78      0.48      0.60        29
           3       0.67      0.25      0.36         8
           4       0.72      0.59      0.65        61
           5       0.62      0.70      0.65        23
           6       0.67      0.25      0.36        24
           7       0.60      0.38      0.46         8
           8       0.38      0.30      0.33        10
           9       0.50      1.00      0.67         5
          10       0.75      0.52      0.62        23
          11       0.32      0.23      0.26        40
          12       0.47      0.40      0.43        40
          13       0.33      0.17      0.22         6
          14       0.00      0.00      0.00         3
          15       0.00      0.00      0.00         8
          16       0.00      0.00      0.00         1
          17       0.88      0.80      0.84        56
          18       0.50      0.57      0.53        14
          19       0.00      0.00      0.00        14
          20       0.00      0.00      0.00         4
          21       1.00      0.31      0.48        16
          22       0.79      0.71      0.75        21
          23       0.33      0.26      0.29        35
          24       1.00      0.14      0.25         7
          25       0.22      0.18      0.20        11
          27       0.50      0.37      0.42        19
          28       0.80      0.50      0.62         8
          30       0.50      1.00      0.67         1
          31       1.00      0.33      0.50         3
          33       1.00      0.50      0.67         2
          34       0.43      0.14      0.21        21
          35       0.00      0.00      0.00         7
          36       0.40      0.33      0.36         6
          37       0.33      0.17      0.22         6
          38       0.00      0.00      0.00         3
          39       1.00      0.40      0.57         5
          40       0.67      0.29      0.40         7
          41       0.00      0.00      0.00         3
          42       0.50      0.22      0.31         9
          43       1.00      0.50      0.67         4
          44       0.00      0.00      0.00         2
          45       0.70      0.52      0.60        27
          46       0.00      0.00      0.00         2
          47       1.00      0.50      0.67         2
          48       0.00      0.00      0.00         1
          49       0.00      0.00      0.00         1
          51       1.00      0.50      0.67         2
          56       0.91      0.49      0.64        43
          57       0.00      0.00      0.00         4
          59       0.00      0.00      0.00         5
          61       0.00      0.00      0.00         1
          62       0.00      0.00      0.00         1
          67       0.00      0.00      0.00         3
          70       0.00      0.00      0.00         1
          71       0.52      0.81      0.64       112
          72       0.39      0.30      0.34        56

    accuracy                           0.68      1572
   macro avg       0.45      0.31      0.34      1572
weighted avg       0.65      0.68      0.65      1572


 Confusion report:
 [[691   0   0 ...   0   0   3]
 [  0   2   0 ...   0   2   0]
 [  5   0  14 ...   0   3   0]
 ...
 [  1   0   0 ...   0   0   0]
 [  1   0   0 ...   0  91  10]
 [  7   0   0 ...   0  31  17]]
Accuracy Score: 0.683206106870229


 

Algorithm: KNeighborsClassifier

 Classification report:
               precision    recall  f1-score   support

           0       0.65      0.98      0.78       733
           1       0.67      0.40      0.50         5
           2       0.91      0.34      0.50        29
           3       1.00      0.12      0.22         8
           4       0.68      0.41      0.51        61
           5       0.50      0.30      0.38        23
           6       0.62      0.21      0.31        24
           7       0.33      0.12      0.18         8
           8       0.00      0.00      0.00        10
           9       0.56      1.00      0.71         5
          10       0.50      0.17      0.26        23
          11       0.18      0.07      0.11        40
          12       0.50      0.30      0.37        40
          13       0.00      0.00      0.00         6
          14       0.00      0.00      0.00         3
          15       0.00      0.00      0.00         8
          16       0.00      0.00      0.00         1
          17       0.87      0.61      0.72        56
          18       0.71      0.36      0.48        14
          19       0.00      0.00      0.00        14
          20       0.00      0.00      0.00         4
          21       0.50      0.06      0.11        16
          22       0.71      0.24      0.36        21
          23       0.36      0.11      0.17        35
          24       0.00      0.00      0.00         7
          25       0.17      0.09      0.12        11
          27       0.75      0.16      0.26        19
          28       0.50      0.12      0.20         8
          30       1.00      1.00      1.00         1
          31       1.00      0.33      0.50         3
          33       0.50      0.50      0.50         2
          34       0.00      0.00      0.00        21
          35       0.00      0.00      0.00         7
          36       0.00      0.00      0.00         6
          37       0.00      0.00      0.00         6
          38       0.00      0.00      0.00         3
          39       0.00      0.00      0.00         5
          40       0.00      0.00      0.00         7
          41       0.00      0.00      0.00         3
          42       1.00      0.11      0.20         9
          43       0.00      0.00      0.00         4
          44       0.00      0.00      0.00         2
          45       0.65      0.56      0.60        27
          46       0.00      0.00      0.00         2
          47       0.00      0.00      0.00         2
          48       0.00      0.00      0.00         1
          49       0.00      0.00      0.00         1
          51       0.00      0.00      0.00         2
          56       0.76      0.37      0.50        43
          57       0.00      0.00      0.00         4
          59       0.00      0.00      0.00         5
          61       0.00      0.00      0.00         1
          62       0.00      0.00      0.00         1
          67       0.00      0.00      0.00         3
          70       0.00      0.00      0.00         1
          71       0.51      0.84      0.64       112
          72       0.64      0.12      0.21        56

    accuracy                           0.62      1572
   macro avg       0.31      0.18      0.20      1572
weighted avg       0.57      0.62      0.55      1572


 Confusion report:
 [[715   0   0 ...   0   0   1]
 [  0   2   0 ...   0   2   0]
 [ 12   0  10 ...   0   3   0]
 ...
 [  1   0   0 ...   0   0   0]
 [  1   0   0 ...   0  94   1]
 [ 10   0   1 ...   0  37   7]]
Accuracy Score: 0.6234096692111959


 

Algorithm: MultinomialNB

 Classification report:
               precision    recall  f1-score   support

           0       0.56      1.00      0.72       733
           1       0.00      0.00      0.00         5
           2       0.00      0.00      0.00        29
           3       0.00      0.00      0.00         8
           4       0.80      0.20      0.32        61
           5       0.00      0.00      0.00        23
           6       0.00      0.00      0.00        24
           7       0.00      0.00      0.00         8
           8       0.00      0.00      0.00        10
           9       0.00      0.00      0.00         5
          10       0.00      0.00      0.00        23
          11       0.00      0.00      0.00        40
          12       0.40      0.05      0.09        40
          13       0.00      0.00      0.00         6
          14       0.00      0.00      0.00         3
          15       0.00      0.00      0.00         8
          16       0.00      0.00      0.00         1
          17       1.00      0.21      0.35        56
          18       0.00      0.00      0.00        14
          19       0.00      0.00      0.00        14
          20       0.00      0.00      0.00         4
          21       0.00      0.00      0.00        16
          22       0.00      0.00      0.00        21
          23       0.00      0.00      0.00        35
          24       0.00      0.00      0.00         7
          25       0.00      0.00      0.00        11
          27       0.00      0.00      0.00        19
          28       0.00      0.00      0.00         8
          30       0.00      0.00      0.00         1
          31       0.00      0.00      0.00         3
          33       0.00      0.00      0.00         2
          34       0.00      0.00      0.00        21
          35       0.00      0.00      0.00         7
          36       0.00      0.00      0.00         6
          37       0.00      0.00      0.00         6
          38       0.00      0.00      0.00         3
          39       0.00      0.00      0.00         5
          40       0.00      0.00      0.00         7
          41       0.00      0.00      0.00         3
          42       0.00      0.00      0.00         9
          43       0.00      0.00      0.00         4
          44       0.00      0.00      0.00         2
          45       0.00      0.00      0.00        27
          46       0.00      0.00      0.00         2
          47       0.00      0.00      0.00         2
          48       0.00      0.00      0.00         1
          49       0.00      0.00      0.00         1
          51       0.00      0.00      0.00         2
          56       0.00      0.00      0.00        43
          57       0.00      0.00      0.00         4
          59       0.00      0.00      0.00         5
          61       0.00      0.00      0.00         1
          62       0.00      0.00      0.00         1
          67       0.00      0.00      0.00         3
          70       0.00      0.00      0.00         1
          71       0.48      0.96      0.64       112
          72       1.00      0.02      0.04        56

    accuracy                           0.55      1572
   macro avg       0.07      0.04      0.04      1572
weighted avg       0.41      0.55      0.41      1572


 Confusion report:
 [[733   0   0 ...   0   0   0]
 [  4   0   0 ...   0   1   0]
 [ 21   0   0 ...   0   8   0]
 ...
 [  1   0   0 ...   0   0   0]
 [  4   0   0 ...   0 108   0]
 [ 15   0   0 ...   0  40   1]]
Accuracy Score: 0.5521628498727735


 

In [ ]:
result_without_aug = pd.DataFrame(np.array(list(result.values()))[:,:-1],    # make a dataframe out of the metrics from result dictionary 
                       columns= ['accuracy_training','accuracy_test',
                                 'recallscore_training', 'recallscore_test', 
                                 'precision_training','precision_test',
                                 'f1score_training', 'f1score_test', 
                                 'Elapsed'],
                      index= result.keys())   # use the model names as index

result_without_aug.index.name = 'Model'   # name the index of the result1 dataframe as 'Model'

result_without_aug
Out[ ]:
accuracy_training accuracy_test recallscore_training recallscore_test precision_training precision_test f1score_training f1score_test Elapsed
Model
Random Forest 95.0382 63.1043 95.0382 63.1043 95.6395 57.7694 94.9112 55.4781 8.5089
Xgboost 78.1489 64.5674 78.1489 64.5674 81.1785 61.3353 76.0116 58.4029 89.5253
SVC 91.9688 68.3206 91.9688 68.3206 92.6802 65.3473 91.7439 64.8431 0.674098
KNN 69.4338 62.341 69.4338 62.341 71.7148 56.9352 64.3132 54.8338 1.6148
Naive Bayes 55.9001 55.2163 55.9001 55.2163 40.7176 40.6733 41.8322 40.7916 0.2861

Without any class imbalance treatment, we see that the test data accuracy is low, and in some models, we see overfitting too. We will try treating class imbalance through augmentation next

11.2 ML Model with class imbalance treatment via augmentation

  1. Split train test

  2. Augment data

In [ ]:
df_ML_Aug.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7860 entries, 0 to 7859
Data columns (total 8 columns):
 #   Column                Non-Null Count  Dtype 
---  ------                --------------  ----- 
 0   Short description     7860 non-null   object
 1   Description           7860 non-null   object
 2   Caller                7860 non-null   object
 3   Assignment group      7860 non-null   object
 4   New Assignment Group  7860 non-null   object
 5   combined_description  7860 non-null   object
 6   pred_group            0 non-null      object
 7   Language              7860 non-null   object
dtypes: object(8)
memory usage: 491.4+ KB
In [ ]:
df_ML_Aug.shape
Out[ ]:
(7860, 8)
In [ ]:
df_ML_Aug.head()
Out[ ]:
Short description Description Caller Assignment group New Assignment Group combined_description pred_group Language
0 login issue verified user details. employee and manager na... spxjnwir pjlcoqds GRP_0 GRP_0 login issue verify user detail employee manage... NaN en
1 outlook team my meetings/skype meetings etc are not ap... hmjdrvpb komuaywn GRP_0 GRP_0 outlook team meeting skype meeting etc appear ... NaN en
2 cant log in to vpn hi i cannot log on to vpn best eylqgodm ybqkwiam GRP_0 GRP_0 can not log vpn hi can not log vpn best NaN en
3 unable to access hr tool page unable to access hr tool page xbkucsvz gcpydteq GRP_0 GRP_0 unable access hr tool page unable access hr to... NaN en
4 skype error skype error owlgqjme qhcozdfx GRP_0 GRP_0 skype error skype error NaN no
In [ ]:
#import nltk
nltk.download('averaged_perceptron_tagger')
nltk.download('wordnet')
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Unzipping corpora/wordnet.zip.
Out[ ]:
True
In [ ]:
!pip install nlpaug
Collecting nlpaug
  Downloading nlpaug-1.1.9-py3-none-any.whl (408 kB)
     |████████████████████████████████| 408 kB 2.7 MB/s 
Requirement already satisfied: requests>=2.22.0 in /usr/local/lib/python3.7/dist-packages (from nlpaug) (2.23.0)
Collecting pandas>=1.2.0
  Downloading pandas-1.3.4-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (11.3 MB)
     |████████████████████████████████| 11.3 MB 32.0 MB/s 
Requirement already satisfied: numpy>=1.16.2 in /usr/local/lib/python3.7/dist-packages (from nlpaug) (1.19.5)
Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.7/dist-packages (from pandas>=1.2.0->nlpaug) (2018.9)
Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.7/dist-packages (from pandas>=1.2.0->nlpaug) (2.8.2)
Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/dist-packages (from python-dateutil>=2.7.3->pandas>=1.2.0->nlpaug) (1.15.0)
Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests>=2.22.0->nlpaug) (2.10)
Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests>=2.22.0->nlpaug) (3.0.4)
Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests>=2.22.0->nlpaug) (2021.10.8)
Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests>=2.22.0->nlpaug) (1.24.3)
Installing collected packages: pandas, nlpaug
  Attempting uninstall: pandas
    Found existing installation: pandas 1.1.5
    Uninstalling pandas-1.1.5:
      Successfully uninstalled pandas-1.1.5
ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
google-colab 1.0.0 requires pandas~=1.1.0; python_version >= "3.0", but you have pandas 1.3.4 which is incompatible.
Successfully installed nlpaug-1.1.9 pandas-1.3.4
In [ ]:
import nlpaug.augmenter.char as nac
import nlpaug.augmenter.word as naw
import nlpaug.augmenter.sentence as nas
import nlpaug.flow as nafc

from nlpaug.util import Action

aug = naw.SynonymAug(aug_src='wordnet',aug_max=4)
In [ ]:
# Take an example for augmentation
example=aug.augment(X_train[5],n=2)
print('\033[1mOriginal text:\033[0m')
print(X_train[5])
print('_'*100)
print('\033[1mAugmented text:\033[0m')
print(example[0])
print(example[1])
Original text:
unable log engineering tool skype unable log engineering tool skype
____________________________________________________________________________________________________
Augmented text:
ineffectual log technology tool skype ineffectual log engineering tool skype
ineffectual log engineering tool skype unable logarithm engineering tool skype
In [ ]:
augmented_sentences=[]
augmented_sentences_labels=[]
for i in X_train.index:
  if y_train[i] in (24,9,12,2,19,3,6):
    temps=aug.augment(X_train[i],n=3)
    for sent in temps:
      augmented_sentences.append(sent)
      augmented_sentences_labels.append(y_train[i])
  elif y_train[i] in (13,10,5,14,24,31,18,28,4,16,47):    
    temps=aug.augment(X_train[i],n=6)
    for sent in temps:
      augmented_sentences.append(sent)
      augmented_sentences_labels.append(y_train[i])
  elif y_train[i] in (30,32,25,27,37,15,38,29,40,36,11,20,1,42,41,22):    
    temps=aug.augment(X_train[i],n=12)
    for sent in temps:
      augmented_sentences.append(sent)
      augmented_sentences_labels.append(y_train[i])
  elif y_train[i] in (46,7,35,17,26,39,34,45,43,33,21,44):    
    temps=aug.augment(X_train[i],n=24)
    for sent in temps:
      augmented_sentences.append(sent)
      augmented_sentences_labels.append(y_train[i])
In [ ]:
df_ML_Aug['target'] = df_ML_Aug['Assignment group'].astype('category').cat.codes
In [ ]:
# Create training and test datasets with 80:20 ratio without augmenatation
X_train, X_test, y_train, y_test = train_test_split(df_ML_Aug.combined_description, 
                                                    df_ML_Aug.target, 
                                                    test_size=0.20, 
                                                    random_state=42)
print('\033[1mShape of the training set:\033[0m', X_train.shape, y_train.shape)
print('\033[1mShape of the test set:\033[0m', X_test.shape, y_test.shape)
Shape of the training set: (6288,) (6288,)
Shape of the test set: (1572,) (1572,)
In [ ]:
X_train=X_train.append(pd.Series(augmented_sentences),ignore_index=True)
y_train=y_train.append(pd.Series(augmented_sentences_labels),ignore_index=True)
print(X_train.shape)
print(y_train.shape)
(31626,)
(31626,)
In [ ]:
y_train_df = pd. DataFrame(y_train,columns=['target'])
In [ ]:
y_train_df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 31626 entries, 0 to 31625
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   target  31626 non-null  int64
dtypes: int64(1)
memory usage: 247.2 KB
In [ ]:
#Create Dataset 
#y_train_df_old_nogrp0 = dataset1[dataset1['Assignment group'] != 'GRP_0']

descending_order = y_train_df['target'].value_counts().sort_values(ascending=False).index
plt.subplots(figsize=(22,5))
#add code to rotate the labels
ax=sns.countplot(x='target', data=y_train_df, color='royalblue',order=descending_order)
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha="right")
plt.tight_layout()
plt.title('Group count after augmentation')
plt.show()
In [ ]:
rf  = RandomForestClassifier()
xgb = XGBClassifier()
SVC = LinearSVC()
KNN = KNeighborsClassifier()
NB = MultinomialNB()

result = {}   # Create an empty dictionary to later use to store metrics of each of the models

for model, name  in zip([rf,xgb, SVC,KNN,NB], 
                         ['Random Forest', 'Xgboost', 'SVC','KNN','Naive Bayes']):
    result[name] = fit_n_print(model,X_train, X_test, y_train, y_test)
Algorithm: RandomForestClassifier

 Classification report:
               precision    recall  f1-score   support

           0       0.68      0.94      0.79       733
           1       0.50      0.20      0.29         5
           2       1.00      0.24      0.39        29
           3       0.00      0.00      0.00         8
           4       0.65      0.54      0.59        61
           5       0.42      0.48      0.45        23
           6       0.67      0.25      0.36        24
           7       0.00      0.00      0.00         8
           8       0.00      0.00      0.00        10
           9       0.62      1.00      0.77         5
          10       0.62      0.22      0.32        23
          11       0.35      0.30      0.32        40
          12       0.55      0.28      0.37        40
          13       1.00      0.17      0.29         6
          14       0.00      0.00      0.00         3
          15       0.00      0.00      0.00         8
          16       0.00      0.00      0.00         1
          17       0.77      0.88      0.82        56
          18       0.83      0.71      0.77        14
          19       0.00      0.00      0.00        14
          20       0.00      0.00      0.00         4
          21       0.80      0.25      0.38        16
          22       0.41      0.52      0.46        21
          23       0.50      0.06      0.10        35
          24       0.00      0.00      0.00         7
          25       0.18      0.18      0.18        11
          26       0.00      0.00      0.00         0
          27       0.80      0.42      0.55        19
          28       0.29      0.25      0.27         8
          30       0.50      1.00      0.67         1
          31       1.00      0.33      0.50         3
          33       1.00      0.50      0.67         2
          34       0.36      0.19      0.25        21
          35       0.00      0.00      0.00         7
          36       1.00      0.50      0.67         6
          37       1.00      0.17      0.29         6
          38       0.00      0.00      0.00         3
          39       1.00      0.40      0.57         5
          40       0.00      0.00      0.00         7
          41       0.00      0.00      0.00         3
          42       0.50      0.22      0.31         9
          43       1.00      0.50      0.67         4
          44       0.00      0.00      0.00         2
          45       0.52      0.59      0.55        27
          46       0.00      0.00      0.00         2
          47       0.00      0.00      0.00         2
          48       0.00      0.00      0.00         1
          49       0.00      0.00      0.00         1
          51       0.00      0.00      0.00         2
          56       0.82      0.21      0.33        43
          57       0.00      0.00      0.00         4
          59       0.00      0.00      0.00         5
          61       0.00      0.00      0.00         1
          62       0.00      0.00      0.00         1
          67       0.00      0.00      0.00         3
          70       0.00      0.00      0.00         1
          71       0.53      0.69      0.60       112
          72       0.38      0.18      0.24        56

    accuracy                           0.64      1572
   macro avg       0.37      0.23      0.25      1572
weighted avg       0.60      0.64      0.58      1572


 Confusion report:
 [[691   0   0 ...   0   0   0]
 [  1   1   0 ...   0   1   0]
 [ 13   0   7 ...   0   3   0]
 ...
 [  1   0   0 ...   0   0   0]
 [  1   0   0 ...   0  77  10]
 [ 14   0   0 ...   0  29  10]]
Accuracy Score: 0.6361323155216285


 

Algorithm: XGBClassifier

 Classification report:
               precision    recall  f1-score   support

           0       0.80      0.77      0.79       733
           1       0.20      0.20      0.20         5
           2       0.69      0.38      0.49        29
           3       0.67      0.25      0.36         8
           4       0.60      0.59      0.60        61
           5       0.38      0.52      0.44        23
           6       0.46      0.25      0.32        24
           7       0.43      0.38      0.40         8
           8       0.57      0.80      0.67        10
           9       0.62      1.00      0.77         5
          10       0.61      0.48      0.54        23
          11       0.18      0.40      0.25        40
          12       0.46      0.42      0.44        40
          13       0.25      0.17      0.20         6
          14       0.00      0.00      0.00         3
          15       1.00      0.12      0.22         8
          16       0.00      0.00      0.00         1
          17       0.55      0.84      0.67        56
          18       0.31      0.79      0.45        14
          19       0.29      0.14      0.19        14
          20       0.00      0.00      0.00         4
          21       0.27      0.19      0.22        16
          22       0.56      0.71      0.63        21
          23       0.50      0.03      0.05        35
          24       0.50      0.14      0.22         7
          25       0.12      0.27      0.17        11
          27       0.50      0.58      0.54        19
          28       0.33      0.50      0.40         8
          30       0.20      1.00      0.33         1
          31       0.00      0.00      0.00         3
          33       0.33      0.50      0.40         2
          34       0.20      0.43      0.28        21
          35       0.12      0.29      0.17         7
          36       0.57      0.67      0.62         6
          37       0.33      0.17      0.22         6
          38       0.00      0.00      0.00         3
          39       0.00      0.00      0.00         5
          40       0.40      0.29      0.33         7
          41       0.00      0.00      0.00         3
          42       0.33      0.22      0.27         9
          43       0.29      0.50      0.36         4
          44       0.00      0.00      0.00         2
          45       0.34      0.59      0.43        27
          46       0.50      0.50      0.50         2
          47       0.00      0.00      0.00         2
          48       0.00      0.00      0.00         1
          49       0.00      0.00      0.00         1
          51       1.00      1.00      1.00         2
          56       1.00      0.30      0.46        43
          57       0.00      0.00      0.00         4
          59       0.00      0.00      0.00         5
          61       0.00      0.00      0.00         1
          62       0.00      0.00      0.00         1
          66       0.00      0.00      0.00         0
          67       0.00      0.00      0.00         3
          70       0.00      0.00      0.00         1
          71       0.50      0.58      0.54       112
          72       0.44      0.25      0.32        56

    accuracy                           0.59      1572
   macro avg       0.32      0.31      0.28      1572
weighted avg       0.62      0.59      0.58      1572


 Confusion report:
 [[567   0   2 ...   0   0   1]
 [  0   1   0 ...   0   0   0]
 [  7   0  11 ...   0   3   0]
 ...
 [  1   0   0 ...   0   0   0]
 [  2   0   0 ...   0  65  10]
 [  4   0   0 ...   0  29  14]]
Accuracy Score: 0.5916030534351145


 

Algorithm: LinearSVC

 Classification report:
               precision    recall  f1-score   support

           0       0.80      0.86      0.83       733
           1       0.40      0.40      0.40         5
           2       0.62      0.45      0.52        29
           3       0.67      0.25      0.36         8
           4       0.59      0.61      0.60        61
           5       0.48      0.70      0.57        23
           6       0.55      0.25      0.34        24
           7       0.70      0.88      0.78         8
           8       0.45      0.50      0.48        10
           9       0.45      1.00      0.62         5
          10       0.58      0.65      0.61        23
          11       0.25      0.33      0.28        40
          12       0.45      0.42      0.44        40
          13       0.25      0.17      0.20         6
          14       0.00      0.00      0.00         3
          15       0.00      0.00      0.00         8
          16       0.00      0.00      0.00         1
          17       0.81      0.84      0.82        56
          18       0.31      0.57      0.40        14
          19       0.25      0.07      0.11        14
          20       0.00      0.00      0.00         4
          21       0.40      0.38      0.39        16
          22       0.75      0.71      0.73        21
          23       0.40      0.17      0.24        35
          24       0.50      0.14      0.22         7
          25       0.16      0.27      0.20        11
          27       0.39      0.37      0.38        19
          28       0.50      0.50      0.50         8
          30       0.50      1.00      0.67         1
          31       1.00      0.33      0.50         3
          33       1.00      0.50      0.67         2
          34       0.17      0.24      0.20        21
          35       0.12      0.14      0.13         7
          36       0.67      0.33      0.44         6
          37       0.17      0.17      0.17         6
          38       0.00      0.00      0.00         3
          39       0.67      0.40      0.50         5
          40       0.50      0.43      0.46         7
          41       0.00      0.00      0.00         3
          42       0.50      0.33      0.40         9
          43       0.67      0.50      0.57         4
          44       0.00      0.00      0.00         2
          45       0.42      0.59      0.49        27
          46       0.50      0.50      0.50         2
          47       1.00      0.50      0.67         2
          48       0.00      0.00      0.00         1
          49       0.00      0.00      0.00         1
          51       1.00      0.50      0.67         2
          56       1.00      0.30      0.46        43
          57       0.00      0.00      0.00         4
          59       0.00      0.00      0.00         5
          61       0.00      0.00      0.00         1
          62       0.00      0.00      0.00         1
          67       0.00      0.00      0.00         3
          70       0.00      0.00      0.00         1
          71       0.49      0.61      0.54       112
          72       0.40      0.29      0.33        56

    accuracy                           0.64      1572
   macro avg       0.39      0.34      0.34      1572
weighted avg       0.63      0.64      0.62      1572


 Confusion report:
 [[632   0   3 ...   0   0   3]
 [  0   2   0 ...   0   2   0]
 [  4   0  13 ...   0   3   0]
 ...
 [  1   0   0 ...   0   0   0]
 [  0   0   0 ...   0  68  10]
 [  7   0   0 ...   0  30  16]]
Accuracy Score: 0.6405852417302799


 

Algorithm: KNeighborsClassifier

 Classification report:
               precision    recall  f1-score   support

           0       0.76      0.84      0.80       733
           1       0.33      0.60      0.43         5
           2       0.86      0.41      0.56        29
           3       0.40      0.25      0.31         8
           4       0.58      0.56      0.57        61
           5       0.56      0.65      0.60        23
           6       0.55      0.25      0.34        24
           7       0.22      0.25      0.24         8
           8       0.00      0.00      0.00        10
           9       0.56      1.00      0.71         5
          10       0.46      0.57      0.51        23
          11       0.20      0.28      0.23        40
          12       0.40      0.42      0.41        40
          13       0.25      0.17      0.20         6
          14       0.00      0.00      0.00         3
          15       0.25      0.12      0.17         8
          16       0.00      0.00      0.00         1
          17       0.69      0.68      0.68        56
          18       0.32      0.50      0.39        14
          19       0.00      0.00      0.00        14
          20       0.00      0.00      0.00         4
          21       0.19      0.19      0.19        16
          22       0.55      0.57      0.56        21
          23       0.38      0.09      0.14        35
          24       0.50      0.29      0.36         7
          25       0.04      0.09      0.05        11
          27       0.41      0.37      0.39        19
          28       0.44      0.50      0.47         8
          30       0.25      1.00      0.40         1
          31       1.00      0.33      0.50         3
          32       0.00      0.00      0.00         0
          33       1.00      0.50      0.67         2
          34       0.12      0.19      0.15        21
          35       0.00      0.00      0.00         7
          36       0.43      0.50      0.46         6
          37       0.00      0.00      0.00         6
          38       0.00      0.00      0.00         3
          39       1.00      0.80      0.89         5
          40       0.75      0.43      0.55         7
          41       0.00      0.00      0.00         3
          42       1.00      0.22      0.36         9
          43       0.50      0.25      0.33         4
          44       0.00      0.00      0.00         2
          45       0.47      0.59      0.52        27
          46       0.25      0.50      0.33         2
          47       1.00      0.50      0.67         2
          48       0.00      0.00      0.00         1
          49       0.00      0.00      0.00         1
          51       0.00      0.00      0.00         2
          56       0.38      0.35      0.37        43
          57       0.00      0.00      0.00         4
          59       0.00      0.00      0.00         5
          61       0.00      0.00      0.00         1
          62       0.00      0.00      0.00         1
          67       0.00      0.00      0.00         3
          70       0.00      0.00      0.00         1
          71       0.52      0.65      0.58       112
          72       0.45      0.09      0.15        56

    accuracy                           0.60      1572
   macro avg       0.33      0.29      0.28      1572
weighted avg       0.59      0.60      0.58      1572


 Confusion report:
 [[614   0   0 ...   0   0   1]
 [  1   3   0 ...   0   1   0]
 [  2   0  12 ...   0   3   0]
 ...
 [  1   0   0 ...   0   0   0]
 [  0   0   0 ...   0  73   1]
 [  8   0   0 ...   0  29   5]]
Accuracy Score: 0.6005089058524173


 

Algorithm: MultinomialNB

 Classification report:
               precision    recall  f1-score   support

           0       0.71      0.91      0.80       733
           1       0.00      0.00      0.00         5
           2       1.00      0.17      0.29        29
           3       0.00      0.00      0.00         8
           4       0.58      0.56      0.57        61
           5       0.62      0.70      0.65        23
           6       1.00      0.08      0.15        24
           7       0.33      0.12      0.18         8
           8       0.00      0.00      0.00        10
           9       0.00      0.00      0.00         5
          10       0.75      0.13      0.22        23
          11       0.26      0.53      0.34        40
          12       0.61      0.35      0.44        40
          13       0.00      0.00      0.00         6
          14       0.00      0.00      0.00         3
          15       0.00      0.00      0.00         8
          16       0.00      0.00      0.00         1
          17       0.58      0.88      0.70        56
          18       0.60      0.21      0.32        14
          19       0.00      0.00      0.00        14
          20       0.00      0.00      0.00         4
          21       0.29      0.12      0.17        16
          22       0.29      0.67      0.41        21
          23       0.00      0.00      0.00        35
          24       0.00      0.00      0.00         7
          25       0.00      0.00      0.00        11
          27       0.56      0.26      0.36        19
          28       0.00      0.00      0.00         8
          30       0.00      0.00      0.00         1
          31       0.00      0.00      0.00         3
          33       1.00      0.50      0.67         2
          34       0.27      0.76      0.40        21
          35       0.00      0.00      0.00         7
          36       1.00      0.17      0.29         6
          37       0.00      0.00      0.00         6
          38       0.00      0.00      0.00         3
          39       1.00      0.20      0.33         5
          40       0.00      0.00      0.00         7
          41       0.00      0.00      0.00         3
          42       1.00      0.11      0.20         9
          43       1.00      0.50      0.67         4
          44       0.00      0.00      0.00         2
          45       0.12      0.81      0.20        27
          46       0.00      0.00      0.00         2
          47       0.00      0.00      0.00         2
          48       0.00      0.00      0.00         1
          49       0.00      0.00      0.00         1
          51       0.00      0.00      0.00         2
          56       0.00      0.00      0.00        43
          57       0.00      0.00      0.00         4
          59       0.00      0.00      0.00         5
          61       0.00      0.00      0.00         1
          62       0.00      0.00      0.00         1
          67       0.00      0.00      0.00         3
          70       0.00      0.00      0.00         1
          71       1.00      0.03      0.05       112
          72       0.00      0.00      0.00        56

    accuracy                           0.56      1572
   macro avg       0.26      0.15      0.15      1572
weighted avg       0.56      0.56      0.49      1572


 Confusion report:
 [[670   0   0 ...   0   0   0]
 [  0   0   0 ...   0   0   0]
 [  8   0   5 ...   0   0   0]
 ...
 [  1   0   0 ...   0   0   0]
 [  0   0   0 ...   0   3   0]
 [ 15   0   0 ...   0   0   0]]
Accuracy Score: 0.5636132315521628


 

In [ ]:
result_with_aug = pd.DataFrame(np.array(list(result.values()))[:,:-1],    # make a dataframe out of the metrics from result dictionary 
                       columns= ['accuracy_training','accuracy_test',
                                 'recallscore_training', 'recallscore_test', 
                                 'precision_training','precision_test',
                                 'f1score_training', 'f1score_test', 
                                 'Elapsed'],
                      index= result.keys())   # use the model names as index

result_with_aug.index.name = 'Model'   # name the index of the result1 dataframe as 'Model'

result_with_aug
Out[ ]:
accuracy_training accuracy_test recallscore_training recallscore_test precision_training precision_test f1score_training f1score_test Elapsed
Model
Random Forest 98.0712 63.6132 98.0712 63.6132 98.2496 59.5113 98.0557 57.8908 49.6436
Xgboost 91.3584 59.1603 91.3584 59.1603 91.6327 61.9264 91.1312 58.4391 328.05
SVC 97.1131 64.0585 97.1131 64.0585 97.2605 63.4746 97.0694 62.493 3.32892
KNN 95.4499 60.0509 95.4499 60.0509 95.3782 58.9458 94.9778 58.1015 20.2341
Naive Bayes 78.8781 56.3613 78.8781 56.3613 81.85 56.1741 75.9759 49.2487 1.21457

even after augmentation, we see test data accuracy is low and there's high evidence of overfitting. We will try treating class imbalance through upsampling/resampling next

11.3 ML Model with class imbalance treated via upsampling/resampling

In [ ]:
df_ML_US =df_ML_Aug.copy()
In [ ]:
df_ML_US.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7860 entries, 0 to 7859
Data columns (total 9 columns):
 #   Column                Non-Null Count  Dtype 
---  ------                --------------  ----- 
 0   Short description     7860 non-null   object
 1   Description           7860 non-null   object
 2   Caller                7860 non-null   object
 3   Assignment group      7860 non-null   object
 4   New Assignment Group  7860 non-null   object
 5   combined_description  7860 non-null   object
 6   pred_group            0 non-null      object
 7   Language              7860 non-null   object
 8   target                7860 non-null   int8  
dtypes: int8(1), object(8)
memory usage: 499.1+ KB
In [ ]:
df_ML_US['target'] = df_ML_US['Assignment group'].astype('category').cat.codes
In [ ]:
#Create Dataset for 'others' i.e all groups which is not part of GRP_0
df_ML_US_nogrp0 = df_ML_US[df_ML_US['New Assignment Group'] != 'GRP_0']

descending_order = df_ML_US_nogrp0['New Assignment Group'].value_counts().sort_values(ascending=False).index
plt.subplots(figsize=(22,5))
#add code to rotate the labels
ax=sns.countplot(x='New Assignment Group', data=df_ML_US_nogrp0, color='royalblue',order=descending_order)
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha="right")
plt.tight_layout()
plt.show()
In [ ]:
maxcount = df_ML_US_nogrp0['New Assignment Group'].value_counts().max()
maxcount
Out[ ]:
591
In [ ]:
df_ML_US_grp0 = df_ML_US[df_ML_US['New Assignment Group'] == 'GRP_0']
In [ ]:
df_ML_US_grp0.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 3660 entries, 0 to 7857
Data columns (total 9 columns):
 #   Column                Non-Null Count  Dtype 
---  ------                --------------  ----- 
 0   Short description     3660 non-null   object
 1   Description           3660 non-null   object
 2   Caller                3660 non-null   object
 3   Assignment group      3660 non-null   object
 4   New Assignment Group  3660 non-null   object
 5   combined_description  3660 non-null   object
 6   pred_group            0 non-null      object
 7   Language              3660 non-null   object
 8   target                3660 non-null   int8  
dtypes: int8(1), object(8)
memory usage: 260.9+ KB
In [ ]:
# Treat the imbalance in the dataset by resampling to 591

df_ML_US_nogrp0_upsampled = df_ML_US_nogrp0[0:0]
for grp in df_ML_US_nogrp0['New Assignment Group'].unique():
    df_ML_US_nogrp0_grp = df_ML_US_nogrp0[df_ML_US_nogrp0['New Assignment Group'] == grp]
    resampled = resample(df_ML_US_nogrp0_grp, replace=True, n_samples=int(maxcount), random_state=SEED)
    df_ML_US_nogrp0_upsampled = df_ML_US_nogrp0_upsampled.append(resampled)

dataset_ML_upsampled=pd.concat([df_ML_US_nogrp0_upsampled, df_ML_US_grp0],ignore_index=True)
descending_order = dataset_ML_upsampled['New Assignment Group'].value_counts().sort_values(ascending=False).index
plt.subplots(figsize=(22,5))
#add code to rotate the labels
ax=sns.countplot(x='New Assignment Group', data=dataset_ML_upsampled, color='royalblue')
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha="right")
plt.tight_layout()
plt.show()
In [ ]:
# Create a column for target (assignment group) variable
dataset_ML_upsampled['target'] = dataset_ML_upsampled['New Assignment Group'].str[4:]
dataset_ML_upsampled['target'] = dataset_ML_upsampled['target'].astype('category').cat.codes

dataset_ML_upsampled.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 31437 entries, 0 to 31436
Data columns (total 9 columns):
 #   Column                Non-Null Count  Dtype 
---  ------                --------------  ----- 
 0   Short description     31437 non-null  object
 1   Description           31437 non-null  object
 2   Caller                31437 non-null  object
 3   Assignment group      31437 non-null  object
 4   New Assignment Group  31437 non-null  object
 5   combined_description  31437 non-null  object
 6   pred_group            0 non-null      object
 7   Language              31437 non-null  object
 8   target                31437 non-null  int8  
dtypes: int8(1), object(8)
memory usage: 1.9+ MB
In [ ]:
dataset_ML_upsampled.groupby(["New Assignment Group", "target"]).size()
Out[ ]:
New Assignment Group  target
GRP_0                 0         3660
GRP_1                 1          591
GRP_10                2          591
GRP_11                3          591
GRP_12                4          591
GRP_13                5          591
GRP_14                6          591
GRP_15                7          591
GRP_16                8          591
GRP_17                9          591
GRP_18                10         591
GRP_19                11         591
GRP_2                 12         591
GRP_20                13         591
GRP_21                14         591
GRP_22                15         591
GRP_24                16         591
GRP_25                17         591
GRP_26                18         591
GRP_27                19         591
GRP_28                20         591
GRP_29                21         591
GRP_3                 22         591
GRP_30                23         591
GRP_31                24         591
GRP_33                25         591
GRP_34                26         591
GRP_36                27         591
GRP_37                28         591
GRP_39                29         591
GRP_4                 30         591
GRP_40                31         591
GRP_41                32         591
GRP_42                33         591
GRP_44                34         591
GRP_45                35         591
GRP_47                36         591
GRP_48                37         591
GRP_5                 38         591
GRP_50                39         591
GRP_53                40         591
GRP_6                 41         591
GRP_60                42         591
GRP_62                43         591
GRP_7                 44         591
GRP_8                 45         591
GRP_9                 46         591
GRP_99                47         591
dtype: int64
In [ ]:
dataset_ML_upsampled.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 31437 entries, 0 to 31436
Data columns (total 9 columns):
 #   Column                Non-Null Count  Dtype 
---  ------                --------------  ----- 
 0   Short description     31437 non-null  object
 1   Description           31437 non-null  object
 2   Caller                31437 non-null  object
 3   Assignment group      31437 non-null  object
 4   New Assignment Group  31437 non-null  object
 5   combined_description  31437 non-null  object
 6   pred_group            0 non-null      object
 7   Language              31437 non-null  object
 8   target                31437 non-null  int8  
dtypes: int8(1), object(8)
memory usage: 1.9+ MB
In [ ]:
dataset_ML_upsampled.to_excel("dataset_ML_upsampled.xlsx")
In [ ]:
# Create training and test datasets with 80:20 ratio without augmenatation
X_train, X_test, y_train, y_test = train_test_split(dataset_ML_upsampled.combined_description, 
                                                    dataset_ML_upsampled.target, 
                                                    test_size=0.20, 
                                                    random_state=42)
print('\033[1mShape of the training set:\033[0m', X_train.shape, y_train.shape)
print('\033[1mShape of the test set:\033[0m', X_test.shape, y_test.shape)
Shape of the training set: (25149,) (25149,)
Shape of the test set: (6288,) (6288,)
In [ ]:
X_train.dtypes, X_test.dtypes, y_train.dtypes, y_test.dtypes
Out[ ]:
(dtype('O'), dtype('O'), dtype('int8'), dtype('int8'))
In [ ]:
rf  = RandomForestClassifier()
xgb = XGBClassifier()
SVC = LinearSVC()
KNN = KNeighborsClassifier()
NB = MultinomialNB()

result = {}   # Create an empty dictionary to later use to store metrics of each of the models

for model, name  in zip([rf,xgb, SVC,KNN,NB], 
                         ['Random Forest', 'Xgboost', 'SVC','KNN','Naive Bayes']):
    result[name] = fit_n_print(model,X_train, X_test, y_train, y_test)
Algorithm: RandomForestClassifier

 Classification report:
               precision    recall  f1-score   support

           0       0.93      0.95      0.94       682
           1       0.87      1.00      0.93       105
           2       0.98      0.88      0.93       117
           3       1.00      1.00      1.00       126
           4       0.98      0.96      0.97       125
           5       0.99      0.95      0.97       117
           6       1.00      1.00      1.00       114
           7       1.00      1.00      1.00       113
           8       0.98      1.00      0.99       127
           9       0.99      1.00      1.00       106
          10       0.98      0.98      0.98       125
          11       0.97      0.87      0.92       131
          12       0.97      0.98      0.97       115
          13       1.00      1.00      1.00       115
          14       1.00      1.00      1.00       118
          15       1.00      1.00      1.00       125
          16       0.99      0.96      0.98       130
          17       0.98      0.99      0.98       130
          18       1.00      1.00      1.00       110
          19       0.99      1.00      1.00       129
          20       1.00      1.00      1.00       127
          21       0.98      0.98      0.98       127
          22       0.95      0.88      0.91       107
          23       0.99      1.00      1.00       102
          24       0.99      0.97      0.98       115
          25       1.00      0.99      1.00       101
          26       1.00      0.99      1.00       112
          27       0.98      1.00      0.99       115
          28       1.00      1.00      1.00       129
          29       0.99      1.00      1.00       124
          30       0.97      1.00      0.98       120
          31       1.00      1.00      1.00       123
          32       0.97      1.00      0.98       131
          33       0.98      1.00      0.99       121
          34       1.00      0.97      0.98       118
          35       1.00      0.81      0.90       113
          36       0.75      0.96      0.84       122
          37       1.00      1.00      1.00       120
          38       0.92      0.67      0.78       109
          39       0.95      1.00      0.98       123
          40       1.00      1.00      1.00       133
          41       0.93      0.57      0.71       124
          42       0.96      0.92      0.94       111
          43       1.00      1.00      1.00       124
          44       1.00      1.00      1.00       117
          45       0.98      0.48      0.65       126
          46       0.38      0.78      0.51       119
          47       1.00      0.98      0.99       115

    accuracy                           0.95      6288
   macro avg       0.96      0.95      0.95      6288
weighted avg       0.96      0.95      0.95      6288


 Confusion report:
 [[649   0   0 ...   0   0   0]
 [  0 105   0 ...   0   0   0]
 [  1   0 103 ...   0  11   0]
 ...
 [  3   4   2 ...  61  34   0]
 [  7   0   0 ...   0  93   0]
 [  2   0   0 ...   0   0 113]]
Accuracy Score: 0.9478371501272265


 

Algorithm: XGBClassifier

 Classification report:
               precision    recall  f1-score   support

           0       0.64      0.82      0.71       682
           1       0.85      1.00      0.92       105
           2       0.97      0.76      0.85       117
           3       0.98      1.00      0.99       126
           4       0.87      0.78      0.82       125
           5       0.93      0.85      0.88       117
           6       0.94      0.82      0.88       114
           7       0.97      1.00      0.99       113
           8       0.91      1.00      0.95       127
           9       0.99      1.00      1.00       106
          10       0.96      0.94      0.95       125
          11       0.81      0.39      0.53       131
          12       0.85      0.58      0.69       115
          13       0.99      1.00      1.00       115
          14       0.98      1.00      0.99       118
          15       0.95      1.00      0.97       125
          16       0.92      0.87      0.89       130
          17       0.82      0.93      0.87       130
          18       0.89      0.92      0.90       110
          19       0.97      1.00      0.98       129
          20       0.98      0.99      0.99       127
          21       0.96      0.91      0.94       127
          22       0.72      0.48      0.57       107
          23       0.97      1.00      0.99       102
          24       0.97      0.77      0.86       115
          25       0.94      0.80      0.87       101
          26       0.84      0.83      0.83       112
          27       0.97      1.00      0.98       115
          28       1.00      1.00      1.00       129
          29       0.99      1.00      1.00       124
          30       0.92      0.85      0.88       120
          31       0.97      1.00      0.98       123
          32       0.98      1.00      0.99       131
          33       0.93      0.98      0.96       121
          34       1.00      0.97      0.98       118
          35       0.91      0.81      0.86       113
          36       0.74      0.96      0.83       122
          37       0.98      1.00      0.99       120
          38       0.90      0.63      0.74       109
          39       0.92      1.00      0.96       123
          40       0.99      1.00      1.00       133
          41       0.92      0.55      0.69       124
          42       0.96      0.92      0.94       111
          43       0.98      1.00      0.99       124
          44       0.97      1.00      0.99       117
          45       0.85      0.44      0.58       126
          46       0.35      0.68      0.46       119
          47       0.86      0.63      0.73       115

    accuracy                           0.87      6288
   macro avg       0.91      0.87      0.88      6288
weighted avg       0.89      0.87      0.87      6288


 Confusion report:
 [[556   0   0 ...   0   0   4]
 [  0 105   0 ...   0   0   0]
 [ 12   0  89 ...   0  11   0]
 ...
 [  4   4   2 ...  56  34   0]
 [ 13   0   0 ...   0  81   2]
 [ 29   0   0 ...   0   0  73]]
Accuracy Score: 0.8681615776081425


 

Algorithm: LinearSVC

 Classification report:
               precision    recall  f1-score   support

           0       0.90      0.80      0.85       682
           1       0.86      1.00      0.93       105
           2       0.98      0.88      0.93       117
           3       0.98      0.98      0.98       126
           4       0.97      0.86      0.92       125
           5       0.94      0.95      0.94       117
           6       0.96      1.00      0.98       114
           7       0.99      1.00      1.00       113
           8       0.91      1.00      0.95       127
           9       0.98      1.00      0.99       106
          10       1.00      0.98      0.99       125
          11       0.86      0.85      0.85       131
          12       0.86      0.91      0.89       115
          13       1.00      1.00      1.00       115
          14       1.00      1.00      1.00       118
          15       0.99      1.00      1.00       125
          16       0.95      0.95      0.95       130
          17       0.94      0.99      0.97       130
          18       0.96      1.00      0.98       110
          19       0.93      1.00      0.97       129
          20       0.99      1.00      1.00       127
          21       0.96      0.97      0.96       127
          22       0.88      0.91      0.89       107
          23       0.96      1.00      0.98       102
          24       0.94      0.93      0.93       115
          25       0.99      0.99      0.99       101
          26       0.97      0.97      0.97       112
          27       0.97      1.00      0.99       115
          28       1.00      1.00      1.00       129
          29       0.98      1.00      0.99       124
          30       0.89      0.93      0.91       120
          31       0.99      1.00      1.00       123
          32       0.98      1.00      0.99       131
          33       0.99      1.00      1.00       121
          34       0.98      0.97      0.97       118
          35       0.99      0.81      0.89       113
          36       0.75      0.96      0.84       122
          37       1.00      1.00      1.00       120
          38       0.87      0.65      0.74       109
          39       0.94      0.94      0.94       123
          40       0.99      1.00      0.99       133
          41       0.92      0.56      0.70       124
          42       0.96      0.92      0.94       111
          43       0.98      1.00      0.99       124
          44       0.99      1.00      1.00       117
          45       0.84      0.44      0.58       126
          46       0.39      0.82      0.53       119
          47       0.93      0.98      0.96       115

    accuracy                           0.92      6288
   macro avg       0.94      0.94      0.93      6288
weighted avg       0.93      0.92      0.92      6288


 Confusion report:
 [[546   0   0 ...   0   0   5]
 [  0 105   0 ...   0   0   0]
 [  1   0 103 ...   0  11   0]
 ...
 [  1   4   2 ...  56  34   1]
 [  4   0   0 ...   0  97   0]
 [  2   0   0 ...   0   0 113]]
Accuracy Score: 0.9233460559796438


 

Algorithm: KNeighborsClassifier

 Classification report:
               precision    recall  f1-score   support

           0       0.76      0.75      0.76       682
           1       0.83      1.00      0.91       105
           2       0.34      0.91      0.49       117
           3       0.92      1.00      0.96       126
           4       0.95      0.64      0.77       125
           5       0.92      0.85      0.88       117
           6       0.94      0.91      0.92       114
           7       0.96      1.00      0.98       113
           8       0.91      0.98      0.95       127
           9       0.96      1.00      0.98       106
          10       0.89      0.93      0.91       125
          11       0.76      0.54      0.63       131
          12       0.86      0.62      0.72       115
          13       0.97      1.00      0.98       115
          14       0.99      1.00      1.00       118
          15       0.99      1.00      1.00       125
          16       0.93      0.88      0.90       130
          17       0.93      0.93      0.93       130
          18       0.97      0.98      0.98       110
          19       0.91      1.00      0.96       129
          20       0.94      1.00      0.97       127
          21       0.90      0.94      0.92       127
          22       0.84      0.66      0.74       107
          23       0.93      1.00      0.96       102
          24       0.83      0.97      0.90       115
          25       0.90      0.94      0.92       101
          26       0.90      0.99      0.94       112
          27       0.97      1.00      0.98       115
          28       1.00      1.00      1.00       129
          29       0.93      1.00      0.96       124
          30       0.84      0.95      0.89       120
          31       0.98      1.00      0.99       123
          32       0.96      1.00      0.98       131
          33       0.95      1.00      0.98       121
          34       1.00      0.97      0.98       118
          35       0.98      0.81      0.89       113
          36       0.98      0.79      0.87       122
          37       0.94      1.00      0.97       120
          38       0.73      0.57      0.64       109
          39       0.91      1.00      0.95       123
          40       0.99      1.00      0.99       133
          41       0.87      0.48      0.62       124
          42       0.96      0.92      0.94       111
          43       0.95      1.00      0.97       124
          44       0.98      1.00      0.99       117
          45       0.80      0.36      0.49       126
          46       0.51      0.34      0.41       119
          47       0.94      0.94      0.94       115

    accuracy                           0.87      6288
   macro avg       0.90      0.89      0.88      6288
weighted avg       0.89      0.87      0.87      6288


 Confusion report:
 [[512   0   3 ...   0   1   4]
 [  0 105   0 ...   0   0   0]
 [  2   0 107 ...   0   2   0]
 ...
 [  1   4  36 ...  45   5   0]
 [ 10   0  59 ...   0  40   1]
 [  7   0   0 ...   0   0 108]]
Accuracy Score: 0.8740458015267175


 

Algorithm: MultinomialNB

 Classification report:
               precision    recall  f1-score   support

           0       0.31      0.98      0.47       682
           1       0.95      0.77      0.85       105
           2       0.97      0.53      0.69       117
           3       1.00      0.91      0.95       126
           4       0.85      0.38      0.52       125
           5       0.94      0.68      0.79       117
           6       0.96      0.46      0.62       114
           7       0.96      0.83      0.89       113
           8       0.98      0.45      0.62       127
           9       0.98      1.00      0.99       106
          10       0.79      0.70      0.74       125
          11       1.00      0.12      0.22       131
          12       0.74      0.28      0.41       115
          13       0.97      0.83      0.90       115
          14       0.98      1.00      0.99       118
          15       1.00      0.84      0.91       125
          16       1.00      0.63      0.77       130
          17       0.98      0.50      0.66       130
          18       1.00      0.57      0.73       110
          19       0.96      0.40      0.56       129
          20       0.95      0.68      0.79       127
          21       0.96      0.76      0.85       127
          22       0.89      0.16      0.27       107
          23       1.00      0.58      0.73       102
          24       0.86      0.26      0.40       115
          25       1.00      0.55      0.71       101
          26       1.00      0.62      0.77       112
          27       0.99      0.83      0.91       115
          28       0.98      1.00      0.99       129
          29       0.77      0.84      0.80       124
          30       0.84      0.57      0.68       120
          31       1.00      0.71      0.83       123
          32       1.00      0.84      0.91       131
          33       0.96      0.92      0.94       121
          34       0.97      0.97      0.97       118
          35       0.97      0.69      0.81       113
          36       0.97      0.79      0.87       122
          37       0.99      0.58      0.73       120
          38       0.59      0.69      0.64       109
          39       0.83      0.82      0.82       123
          40       1.00      0.92      0.96       133
          41       0.90      0.37      0.53       124
          42       0.93      0.92      0.92       111
          43       0.99      0.88      0.93       124
          44       1.00      0.72      0.84       117
          45       0.78      0.32      0.45       126
          46       0.30      0.70      0.42       119
          47       0.84      0.32      0.47       115

    accuracy                           0.69      6288
   macro avg       0.91      0.66      0.73      6288
weighted avg       0.86      0.69      0.71      6288


 Confusion report:
 [[669   0   0 ...   0   0   0]
 [  2  81   0 ...   0   0   0]
 [ 32   0  62 ...   0  16   0]
 ...
 [  2   1   2 ...  40  39   0]
 [ 33   0   0 ...   0  83   0]
 [ 69   0   0 ...   0   0  37]]
Accuracy Score: 0.6925890585241731


 

In [ ]:
result_ML_US= pd.DataFrame(np.array(list(result.values()))[:,:-1],    # make a dataframe out of the metrics from result dictionary 
                       columns= ['accuracy_training','accuracy_test',
                                 'recallscore_training', 'recallscore_test', 
                                 'precision_training','precision_test',
                                 'f1score_training', 'f1score_test', 
                                 'Elapsed'],
                      index= result.keys())   # use the model names as index

result_ML_US.index.name = 'Model'   # name the index of the result1 dataframe as 'Model'

result_ML_US
Out[ ]:
accuracy_training accuracy_test recallscore_training recallscore_test precision_training precision_test f1score_training f1score_test Elapsed
Model
Random Forest 95.9919 94.7837 95.9919 94.7837 97.5637 96.1177 96.293 94.9563 27.186
Xgboost 89.6099 86.8162 89.6099 86.8162 91.5836 88.524 89.8043 86.7776 172.623
SVC 94.9223 92.3346 94.9223 92.3346 96.4115 93.464 95.2116 92.4131 2.78671
KNN 91.1686 87.4046 91.1686 87.4046 92.9089 88.6808 91.3715 87.2089 19.8487
Naive Bayes 72.655 69.2589 72.655 69.2589 86.9918 85.5484 74.34 71.0507 1.15696
In [ ]:
result_ML_US.to_excel('result_ML_US.xlsx')

ML model with upsampling has overcome the problem of overfitting. We will do randomised gridsearch on this for hypertuning.

In [ ]:
from sklearn.model_selection import train_test_split, KFold, GridSearchCV, RandomizedSearchCV,cross_val_score, RepeatedStratifiedKFold,  StratifiedKFold
In [ ]:
# Create training and test datasets with 80:20 ratio without augmenatation
X_train, X_test, y_train, y_test = train_test_split(dataset_ML_upsampled.combined_description, 
                                                    dataset_ML_upsampled.target, 
                                                    test_size=0.20, 
                                                    random_state=42)
print('\033[1mShape of the training set:\033[0m', X_train.shape, y_train.shape)
print('\033[1mShape of the test set:\033[0m', X_test.shape, y_test.shape)
Shape of the training set: (25149,) (25149,)
Shape of the test set: (6288,) (6288,)

Since, Random Forest gave highest accuracy, precision and recall among all ML Models, we will hyper tune Random Forest Model

11.4 HYPER TUNING RANDOM FOREST

In [ ]:
pipeline = Pipeline([
          ('vect', CountVectorizer()),
         ('tfidf', TfidfTransformer()),
         ('clf', RandomForestClassifier()),
         ])


parameters = {'vect__ngram_range': [(1, 1), (1, 2)],
              'tfidf__use_idf': (True, False),
              'clf__bootstrap': [True],
              'clf__max_depth': [ None, 10,20,30, 40,50],
              'clf__max_features': ['auto', 'sqrt'],
              'clf__min_samples_leaf': [None,1,2, 4, 8,10],
              'clf__n_estimators': [100]}


if __name__ == "__main__":
  grid_search = GridSearchCV(pipeline, parameters, n_jobs=-1, verbose=1,cv=5)
  print("Performing grid search...")
  print("pipeline:", [name for name, _ in pipeline.steps])
  print("parameters:")
  print(parameters)
 
  RF_CV_Fit = grid_search.fit(X_train, y_train)
  #print("done in %0.3fs" % (time() - t0))
  print()
  
  print("Best score: %0.3f" % RF_CV_Fit.best_score_)
  print("Best parameters set:")
  best_parameters = RF_CV_Fit.best_estimator_.get_params()
  for param_name in sorted(parameters.keys()):
    print("\t%s: %r" % (param_name, best_parameters[param_name]))

  #print(RF_CV_Fit.cv_results_)
Performing grid search...
pipeline: ['vect', 'tfidf', 'clf']
parameters:
{'vect__ngram_range': [(1, 1), (1, 2)], 'tfidf__use_idf': (True, False), 'clf__bootstrap': [True], 'clf__max_depth': [None, 10, 20, 30, 40, 50], 'clf__max_features': ['auto', 'sqrt'], 'clf__min_samples_leaf': [None, 1, 2, 4, 8, 10], 'clf__n_estimators': [100]}
Fitting 5 folds for each of 288 candidates, totalling 1440 fits

Best score: 0.940
Best parameters set:
	clf__bootstrap: True
	clf__max_depth: None
	clf__max_features: 'auto'
	clf__min_samples_leaf: 1
	clf__n_estimators: 100
	tfidf__use_idf: False
	vect__ngram_range: (1, 2)
{'mean_fit_time': array([ 0.74865465,  1.67359405,  0.78619723,  1.71098561, 28.84672332,
       74.43511553, 28.65673614, 74.95073571, 12.51527905, 28.4268507 ,
       12.27425117, 27.6733058 ,  8.93519526, 19.98553562,  8.57288623,
       18.65879254,  6.20619192, 12.49336271,  6.05820575, 11.6839551 ,
        5.48375521, 10.15132422,  5.22009912,  9.57310047,  0.71890039,
        1.59900298,  0.70906501,  1.53463111, 28.19437113, 73.53564353,
       28.45340581, 74.39056029, 12.50757804, 28.59056473, 12.24924459,
       27.55002136,  8.96006808, 20.44033871,  8.88434482, 19.18388901,
        6.28745747, 12.41598473,  5.93432112, 11.82889876,  5.37331023,
       10.24623966,  5.18876982,  9.47824478,  0.72241559,  1.55671353,
        0.71301546,  1.54696302,  1.92313251,  3.86300569,  1.8663662 ,
        3.70041995,  1.83289566,  3.51556435,  1.79271007,  3.41325021,
        1.78196373,  3.43513322,  1.75588279,  3.18577647,  1.69742761,
        3.06614847,  1.64262142,  2.85674233,  1.65307078,  2.9838304 ,
        1.60914445,  2.84149714,  0.72946095,  1.59941869,  0.70053153,
        1.54269614,  1.89417653,  3.82370009,  1.86840577,  3.81653309,
        1.84801855,  3.50327826,  1.79869022,  3.33255382,  1.783847  ,
        3.2903389 ,  1.72100081,  3.20338869,  1.69526324,  3.06174726,
        1.64144373,  2.97624521,  1.65033965,  2.98666601,  1.61865416,
        2.84738793,  0.7411293 ,  1.57871194,  0.71843338,  1.5368516 ,
        3.31339674,  7.25809703,  3.24995465,  7.28937998,  2.73099666,
        5.6107955 ,  2.67866821,  5.41507111,  2.41513276,  4.71098046,
        2.37909622,  4.68626432,  2.19811516,  4.09786935,  2.11141953,
        3.8570509 ,  2.10084615,  3.85793934,  2.05467052,  3.58279371,
        0.71808348,  1.5817143 ,  0.72614155,  1.56758299,  3.30709276,
        7.21715732,  3.31651058,  7.10318422,  2.68700123,  5.62322221,
        2.61382804,  5.26819592,  2.45912228,  4.79342766,  2.39685102,
        4.63982019,  2.21131549,  4.14284072,  2.09695797,  3.86904602,
        2.12517862,  3.99056211,  2.0823616 ,  3.60800996,  0.72397099,
        1.56097713,  0.70280743,  1.57423868,  5.20177174, 11.91174912,
        5.44017959, 12.19463372,  3.8754199 ,  7.72400246,  3.51137195,
        7.02639084,  3.1018209 ,  6.2854919 ,  3.07712321,  5.77850409,
        2.67542133,  5.05707617,  2.57830796,  4.77005129,  2.53588719,
        4.66016841,  2.45593972,  4.37218924,  0.72195497,  1.59410892,
        0.71142545,  1.55294938,  5.0540585 , 11.12135382,  5.09347444,
       11.60119796,  3.58049622,  7.36389232,  3.60819907,  7.20581889,
        3.13170476,  6.24391212,  2.97308483,  5.92095857,  2.67269802,
        5.15381861,  2.53880358,  4.74008875,  2.51219254,  4.58219123,
        2.43650932,  4.28098965,  0.69666305,  1.59573941,  0.70859499,
        1.52367964,  6.99319191, 16.03008704,  7.12201729, 16.3987009 ,
        4.58872051,  9.18937101,  4.51059747,  9.07230463,  3.78599925,
        7.32772226,  3.63548522,  7.31311622,  3.07652631,  5.80219111,
        3.0399158 ,  5.3717329 ,  2.90202227,  5.39125085,  2.79718456,
        4.81568584,  0.71149821,  1.58169823,  0.70747957,  1.53541603,
        6.92320552, 15.50614305,  7.07308512, 16.38766866,  4.4823319 ,
        9.20005755,  4.49066153,  9.05438838,  3.68280725,  7.56535873,
        3.76380315,  7.02407527,  3.14987779,  5.87420092,  3.05433068,
        5.62463799,  3.07998056,  5.53088722,  2.93298221,  5.2658524 ,
        0.74752231,  1.6609797 ,  0.75044217,  1.59811211,  9.31246452,
       20.71321464,  9.64812555, 21.42072363,  5.54857192, 11.05878668,
        5.54812474, 10.429318  ,  4.61017103,  8.86301422,  4.3580833 ,
        8.43770051,  3.57051225,  6.58561659,  3.42614641,  6.05101657,
        3.30378647,  6.04251776,  3.15684419,  5.46018109,  0.71179409,
        1.57890916,  0.71118984,  1.5649951 ,  9.22111764, 19.8063663 ,
        9.3723186 , 20.87417464,  5.54048057, 10.85517969,  5.32095857,
       10.3319078 ,  4.2747376 ,  8.53790383,  4.22082543,  8.21901026,
        3.50546894,  6.56115551,  3.37665086,  6.31649132,  3.33025856,
        6.01504412,  3.18776836,  4.97431712]), 'std_fit_time': array([0.00677019, 0.06386226, 0.04527634, 0.07072073, 0.28221089,
       1.19312975, 0.28698746, 1.71485535, 0.10929369, 0.34486095,
       0.1447365 , 0.71449514, 0.09424901, 0.49824093, 0.14682552,
       0.42833857, 0.09929544, 0.61847604, 0.14733316, 0.31553517,
       0.06346608, 0.41682018, 0.07702446, 0.21134127, 0.01231824,
       0.03129927, 0.0158867 , 0.02355611, 0.25379178, 1.15676748,
       0.3762076 , 0.89438525, 0.15704412, 0.44528091, 0.11967457,
       0.41971989, 0.15286961, 0.55463806, 0.21722644, 0.50013477,
       0.10027718, 0.24939877, 0.14130259, 0.60246919, 0.06053333,
       0.33676793, 0.0987412 , 0.24885977, 0.01694365, 0.01709234,
       0.00830146, 0.00472063, 0.03154448, 0.04504038, 0.01792408,
       0.16145929, 0.02574718, 0.11104763, 0.03943556, 0.08791603,
       0.01407478, 0.08435388, 0.03961997, 0.06675408, 0.02512789,
       0.09838365, 0.02401506, 0.06866354, 0.0115158 , 0.06742556,
       0.01705267, 0.05210687, 0.00760608, 0.014114  , 0.00549436,
       0.01963926, 0.03576583, 0.07789438, 0.04543754, 0.06470698,
       0.02990583, 0.11132169, 0.0262406 , 0.08578177, 0.03232067,
       0.06573557, 0.02866703, 0.06343333, 0.02840561, 0.07381297,
       0.02449677, 0.03796616, 0.02552831, 0.07178885, 0.01459444,
       0.0295898 , 0.01509954, 0.01826214, 0.01399789, 0.01253613,
       0.0872423 , 0.2961791 , 0.07891186, 0.27742812, 0.04444076,
       0.17006423, 0.03622079, 0.16761031, 0.02566227, 0.06817065,
       0.02361126, 0.15276086, 0.02662244, 0.16237332, 0.02291128,
       0.18325973, 0.03331705, 0.09653657, 0.01912771, 0.07498117,
       0.01476786, 0.03468836, 0.01087165, 0.01488579, 0.0875194 ,
       0.1832279 , 0.04284823, 0.06062795, 0.04408602, 0.22516767,
       0.02781321, 0.13785492, 0.03883219, 0.23706672, 0.03669035,
       0.07066623, 0.04345885, 0.10256493, 0.02416704, 0.15866134,
       0.02014655, 0.12957193, 0.02979499, 0.16873343, 0.00756282,
       0.02253611, 0.01182673, 0.03925411, 0.07968467, 0.25911894,
       0.16961541, 0.24694481, 0.12524727, 0.17178433, 0.02514362,
       0.30239499, 0.0398199 , 0.05808255, 0.02149936, 0.17136384,
       0.02433784, 0.25679156, 0.04677217, 0.16924676, 0.04563828,
       0.21957671, 0.04656614, 0.08852195, 0.00710362, 0.01544826,
       0.01309847, 0.01153317, 0.0496615 , 0.25984075, 0.12180628,
       0.38035997, 0.07722204, 0.15984755, 0.10369607, 0.16814261,
       0.06824514, 0.21252785, 0.06640135, 0.11808101, 0.05291058,
       0.05658342, 0.05808262, 0.1253986 , 0.04839842, 0.132792  ,
       0.03820883, 0.11422665, 0.0075629 , 0.02702227, 0.00645293,
       0.00508637, 0.1079216 , 0.28378194, 0.07271764, 0.57057236,
       0.10927485, 0.17554412, 0.08539411, 0.29834441, 0.08173247,
       0.33825706, 0.0542879 , 0.2756044 , 0.03003532, 0.10765271,
       0.07018513, 0.11887673, 0.02374822, 0.2616053 , 0.02396916,
       0.11526678, 0.00913188, 0.0248376 , 0.00541302, 0.0052289 ,
       0.15808082, 0.70920235, 0.08081056, 0.44986774, 0.07119139,
       0.27899983, 0.05117806, 0.45502575, 0.05096991, 0.25564872,
       0.07108788, 0.27743368, 0.04075918, 0.18277381, 0.04668657,
       0.13982941, 0.05608458, 0.09380994, 0.0400004 , 0.12418816,
       0.01029164, 0.00782285, 0.01720947, 0.03825913, 0.19868815,
       0.23887606, 0.08455585, 0.5399584 , 0.1432047 , 0.27677753,
       0.06447403, 0.3921789 , 0.06176158, 0.33106153, 0.09108888,
       0.33727951, 0.09015937, 0.18474449, 0.03108869, 0.23008846,
       0.04866655, 0.22760617, 0.04506829, 0.14946669, 0.01289616,
       0.02717819, 0.01495169, 0.02421647, 0.14352156, 0.48619491,
       0.29978173, 0.51780552, 0.12848978, 0.27424598, 0.11399073,
       0.43768064, 0.07249213, 0.35597712, 0.11419258, 0.2447817 ,
       0.04715626, 0.23554689, 0.03563735, 0.12472853, 0.06420637,
       0.16051862, 0.04613815, 0.75606477]), 'mean_score_time': array([0.        , 0.        , 0.        , 0.        , 0.52944741,
       0.81832857, 0.51587024, 0.79588866, 0.54002819, 0.76662169,
       0.52520518, 0.7440258 , 0.45564284, 0.73551655, 0.44898543,
       0.71316295, 0.43960414, 0.69799385, 0.43125734, 0.68264832,
       0.42674656, 0.68974614, 0.43111963, 0.67849469, 0.        ,
       0.        , 0.        , 0.        , 0.53875694, 0.824299  ,
       0.51956811, 0.80705585, 0.53300953, 0.76056709, 0.50529327,
       0.74954333, 0.4569459 , 0.76901197, 0.45858693, 0.71991191,
       0.44545379, 0.68917193, 0.43638124, 0.68374443, 0.42453413,
       0.67568965, 0.42457428, 0.66173391, 0.        , 0.        ,
       0.        , 0.        , 0.3224359 , 0.54036798, 0.3152864 ,
       0.51070004, 0.3195323 , 0.52891698, 0.32065849, 0.51553531,
       0.32601438, 0.54123974, 0.32263427, 0.51884398, 0.31443887,
       0.52291293, 0.31650882, 0.52166462, 0.31872668, 0.53989425,
       0.31097207, 0.53890128, 0.        , 0.        , 0.        ,
       0.        , 0.32029829, 0.52994785, 0.31784644, 0.54369354,
       0.31757555, 0.53597169, 0.32673264, 0.52532616, 0.32582173,
       0.53800278, 0.31585274, 0.52029629, 0.32032776, 0.52809491,
       0.30975971, 0.53471513, 0.31635413, 0.52556248, 0.31454229,
       0.52781353, 0.        , 0.        , 0.        , 0.        ,
       0.34816923, 0.55936522, 0.33952279, 0.53584208, 0.33766899,
       0.55617127, 0.33990636, 0.52764239, 0.33228512, 0.53686299,
       0.33463578, 0.53996391, 0.33121266, 0.54118261, 0.33416753,
       0.52750549, 0.33135962, 0.53521152, 0.33097124, 0.52595687,
       0.        , 0.        , 0.        , 0.        , 0.34435534,
       0.54547791, 0.33877535, 0.53135629, 0.33777003, 0.54176602,
       0.33663011, 0.52882409, 0.33345761, 0.54350352, 0.34068227,
       0.55211706, 0.3297842 , 0.5379127 , 0.32604275, 0.53160186,
       0.33632655, 0.55027561, 0.3317462 , 0.52531457, 0.        ,
       0.        , 0.        , 0.        , 0.3799912 , 0.57040796,
       0.37834234, 0.58877997, 0.37440634, 0.56242452, 0.3487155 ,
       0.54085555, 0.34721704, 0.54972401, 0.34422121, 0.54605408,
       0.34352193, 0.55106626, 0.34135709, 0.55237308, 0.34911013,
       0.56154122, 0.34805245, 0.55239053, 0.        , 0.        ,
       0.        , 0.        , 0.36709218, 0.55127926, 0.36121922,
       0.54074683, 0.34482231, 0.56767855, 0.35465703, 0.54195452,
       0.35406408, 0.54735436, 0.34139357, 0.54019995, 0.35130348,
       0.5559484 , 0.34476061, 0.54456167, 0.34408851, 0.55688138,
       0.34374137, 0.54189792, 0.        , 0.        , 0.        ,
       0.        , 0.39586163, 0.58201356, 0.37525382, 0.57167926,
       0.36703453, 0.59508395, 0.38026032, 0.55234122, 0.36001849,
       0.56497617, 0.35573077, 0.55307698, 0.35625072, 0.56947808,
       0.36142125, 0.55209641, 0.35644617, 0.56230412, 0.35262361,
       0.55776858, 0.        , 0.        , 0.        , 0.        ,
       0.39267654, 0.57007632, 0.38456063, 0.56675811, 0.3651865 ,
       0.56667128, 0.35862446, 0.56466331, 0.35820289, 0.57342157,
       0.37318501, 0.561551  , 0.35821342, 0.57744932, 0.36343546,
       0.56140466, 0.37493229, 0.57958937, 0.36406293, 0.57660756,
       0.        , 0.        , 0.        , 0.        , 0.43004136,
       0.61034279, 0.52905564, 0.59611063, 0.39341035, 0.60581489,
       0.3869256 , 0.58549781, 0.39125648, 0.59878435, 0.37985015,
       0.58759794, 0.37050509, 0.59441099, 0.37224426, 0.57598   ,
       0.37297587, 0.58499026, 0.3624064 , 0.57051105, 0.        ,
       0.        , 0.        , 0.        , 0.45945239, 0.58947587,
       0.38413792, 0.58612041, 0.38939066, 0.58763804, 0.37813768,
       0.57289009, 0.36994052, 0.5834888 , 0.37023015, 0.58190961,
       0.37093053, 0.60065966, 0.36773443, 0.5722301 , 0.37287259,
       0.59010072, 0.36537051, 0.50184507]), 'std_score_time': array([0.        , 0.        , 0.        , 0.        , 0.00574046,
       0.00996226, 0.00911676, 0.00830513, 0.05682813, 0.01190702,
       0.05887463, 0.0031939 , 0.00767629, 0.01254886, 0.00484923,
       0.00878266, 0.00919858, 0.01109556, 0.00297663, 0.01102942,
       0.00522813, 0.01927844, 0.01438783, 0.01442163, 0.        ,
       0.        , 0.        , 0.        , 0.0183946 , 0.01054017,
       0.01344818, 0.00673492, 0.06346812, 0.00699778, 0.04835864,
       0.00986099, 0.00988103, 0.0189616 , 0.00970374, 0.01866024,
       0.01645249, 0.00937852, 0.01735412, 0.00810002, 0.00486003,
       0.00796878, 0.00858382, 0.01095289, 0.        , 0.        ,
       0.        , 0.        , 0.00503074, 0.00919593, 0.0090687 ,
       0.00858131, 0.00694009, 0.02750943, 0.01774869, 0.01696162,
       0.00632595, 0.01114209, 0.01636303, 0.0068118 , 0.0064641 ,
       0.00270939, 0.01058092, 0.0076899 , 0.00576521, 0.02057988,
       0.00625975, 0.02131623, 0.        , 0.        , 0.        ,
       0.        , 0.00591386, 0.00800377, 0.00573918, 0.03353082,
       0.00640322, 0.00967699, 0.01706484, 0.02071539, 0.00841749,
       0.01282622, 0.00692347, 0.00918773, 0.00475145, 0.00768886,
       0.00674526, 0.02100675, 0.00331355, 0.01735921, 0.00372175,
       0.01794949, 0.        , 0.        , 0.        , 0.        ,
       0.0067348 , 0.01787651, 0.00766494, 0.00697893, 0.00893757,
       0.01481132, 0.00887656, 0.00815059, 0.00879903, 0.01869945,
       0.00467404, 0.00124385, 0.00444483, 0.01024947, 0.00610433,
       0.00967368, 0.00394064, 0.00801243, 0.0067243 , 0.00996656,
       0.        , 0.        , 0.        , 0.        , 0.0053351 ,
       0.01415724, 0.00758157, 0.01047729, 0.00473373, 0.01129209,
       0.00627269, 0.00684544, 0.00466174, 0.01530592, 0.00938365,
       0.01155852, 0.00512169, 0.01054835, 0.00634658, 0.0119283 ,
       0.00863516, 0.00734251, 0.00570408, 0.00901754, 0.        ,
       0.        , 0.        , 0.        , 0.01609053, 0.01645626,
       0.00947449, 0.02375962, 0.01354133, 0.00511089, 0.00522718,
       0.00982793, 0.0074237 , 0.0094136 , 0.00296161, 0.00815446,
       0.00693228, 0.01393988, 0.00557115, 0.01558844, 0.00841083,
       0.01719904, 0.00883965, 0.00982001, 0.        , 0.        ,
       0.        , 0.        , 0.00732061, 0.00398646, 0.01773712,
       0.0089479 , 0.00535237, 0.02711099, 0.01197502, 0.00721375,
       0.01056827, 0.00627763, 0.00689437, 0.00480517, 0.00934751,
       0.00785526, 0.00609003, 0.00832419, 0.00817891, 0.00736843,
       0.01106606, 0.01988374, 0.        , 0.        , 0.        ,
       0.        , 0.01970489, 0.01295416, 0.01163508, 0.02303635,
       0.00566911, 0.0141395 , 0.01026745, 0.01048619, 0.00495164,
       0.01561057, 0.00663969, 0.01553047, 0.00674538, 0.00837623,
       0.01568875, 0.00928331, 0.00525063, 0.01319629, 0.00762216,
       0.00991823, 0.        , 0.        , 0.        , 0.        ,
       0.01922066, 0.00751795, 0.01064929, 0.00865794, 0.01455189,
       0.0049881 , 0.00237571, 0.02059251, 0.00845767, 0.01732665,
       0.01901882, 0.00582196, 0.00679029, 0.02092851, 0.01003664,
       0.0232208 , 0.00881776, 0.01417224, 0.00911785, 0.00468594,
       0.        , 0.        , 0.        , 0.        , 0.04578838,
       0.00491282, 0.06613487, 0.01101426, 0.00897531, 0.01714909,
       0.00893397, 0.01542346, 0.00509738, 0.00965718, 0.0032588 ,
       0.00689498, 0.00619565, 0.01669214, 0.00771474, 0.01011787,
       0.00861974, 0.00772079, 0.0069369 , 0.00365757, 0.        ,
       0.        , 0.        , 0.        , 0.07240702, 0.01439435,
       0.00920749, 0.01393845, 0.00286916, 0.01216078, 0.01095711,
       0.00975142, 0.00879761, 0.01702413, 0.00463842, 0.01963843,
       0.01276748, 0.02083632, 0.00827729, 0.00870931, 0.00536894,
       0.0093814 , 0.00698553, 0.1092182 ]), 'param_clf__bootstrap': masked_array(data=[True, True, True, True, True, True, True, True, True,
                   True, True, True, True, True, True, True, True, True,
                   True, True, True, True, True, True, True, True, True,
                   True, True, True, True, True, True, True, True, True,
                   True, True, True, True, True, True, True, True, True,
                   True, True, True, True, True, True, True, True, True,
                   True, True, True, True, True, True, True, True, True,
                   True, True, True, True, True, True, True, True, True,
                   True, True, True, True, True, True, True, True, True,
                   True, True, True, True, True, True, True, True, True,
                   True, True, True, True, True, True, True, True, True,
                   True, True, True, True, True, True, True, True, True,
                   True, True, True, True, True, True, True, True, True,
                   True, True, True, True, True, True, True, True, True,
                   True, True, True, True, True, True, True, True, True,
                   True, True, True, True, True, True, True, True, True,
                   True, True, True, True, True, True, True, True, True,
                   True, True, True, True, True, True, True, True, True,
                   True, True, True, True, True, True, True, True, True,
                   True, True, True, True, True, True, True, True, True,
                   True, True, True, True, True, True, True, True, True,
                   True, True, True, True, True, True, True, True, True,
                   True, True, True, True, True, True, True, True, True,
                   True, True, True, True, True, True, True, True, True,
                   True, True, True, True, True, True, True, True, True,
                   True, True, True, True, True, True, True, True, True,
                   True, True, True, True, True, True, True, True, True,
                   True, True, True, True, True, True, True, True, True,
                   True, True, True, True, True, True, True, True, True,
                   True, True, True, True, True, True, True, True, True,
                   True, True, True, True, True, True, True, True, True,
                   True, True, True, True, True, True, True, True, True],
             mask=[False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False],
       fill_value='?',
            dtype=object), 'param_clf__max_depth': masked_array(data=[None, None, None, None, None, None, None, None, None,
                   None, None, None, None, None, None, None, None, None,
                   None, None, None, None, None, None, None, None, None,
                   None, None, None, None, None, None, None, None, None,
                   None, None, None, None, None, None, None, None, None,
                   None, None, None, 10, 10, 10, 10, 10, 10, 10, 10, 10,
                   10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
                   10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
                   10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 20, 20, 20,
                   20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
                   20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
                   20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
                   20, 20, 20, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
                   30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
                   30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
                   30, 30, 30, 30, 30, 30, 30, 30, 30, 40, 40, 40, 40, 40,
                   40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
                   40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
                   40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
                   40, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
                   50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
                   50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
                   50, 50, 50, 50, 50, 50, 50],
             mask=[False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False],
       fill_value='?',
            dtype=object), 'param_clf__max_features': masked_array(data=['auto', 'auto', 'auto', 'auto', 'auto', 'auto', 'auto',
                   'auto', 'auto', 'auto', 'auto', 'auto', 'auto', 'auto',
                   'auto', 'auto', 'auto', 'auto', 'auto', 'auto', 'auto',
                   'auto', 'auto', 'auto', 'sqrt', 'sqrt', 'sqrt', 'sqrt',
                   'sqrt', 'sqrt', 'sqrt', 'sqrt', 'sqrt', 'sqrt', 'sqrt',
                   'sqrt', 'sqrt', 'sqrt', 'sqrt', 'sqrt', 'sqrt', 'sqrt',
                   'sqrt', 'sqrt', 'sqrt', 'sqrt', 'sqrt', 'sqrt', 'auto',
                   'auto', 'auto', 'auto', 'auto', 'auto', 'auto', 'auto',
                   'auto', 'auto', 'auto', 'auto', 'auto', 'auto', 'auto',
                   'auto', 'auto', 'auto', 'auto', 'auto', 'auto', 'auto',
                   'auto', 'auto', 'sqrt', 'sqrt', 'sqrt', 'sqrt', 'sqrt',
                   'sqrt', 'sqrt', 'sqrt', 'sqrt', 'sqrt', 'sqrt', 'sqrt',
                   'sqrt', 'sqrt', 'sqrt', 'sqrt', 'sqrt', 'sqrt', 'sqrt',
                   'sqrt', 'sqrt', 'sqrt', 'sqrt', 'sqrt', 'auto', 'auto',
                   'auto', 'auto', 'auto', 'auto', 'auto', 'auto', 'auto',
                   'auto', 'auto', 'auto', 'auto', 'auto', 'auto', 'auto',
                   'auto', 'auto', 'auto', 'auto', 'auto', 'auto', 'auto',
                   'auto', 'sqrt', 'sqrt', 'sqrt', 'sqrt', 'sqrt', 'sqrt',
                   'sqrt', 'sqrt', 'sqrt', 'sqrt', 'sqrt', 'sqrt', 'sqrt',
                   'sqrt', 'sqrt', 'sqrt', 'sqrt', 'sqrt', 'sqrt', 'sqrt',
                   'sqrt', 'sqrt', 'sqrt', 'sqrt', 'auto', 'auto', 'auto',
                   'auto', 'auto', 'auto', 'auto', 'auto', 'auto', 'auto',
                   'auto', 'auto', 'auto', 'auto', 'auto', 'auto', 'auto',
                   'auto', 'auto', 'auto', 'auto', 'auto', 'auto', 'auto',
                   'sqrt', 'sqrt', 'sqrt', 'sqrt', 'sqrt', 'sqrt', 'sqrt',
                   'sqrt', 'sqrt', 'sqrt', 'sqrt', 'sqrt', 'sqrt', 'sqrt',
                   'sqrt', 'sqrt', 'sqrt', 'sqrt', 'sqrt', 'sqrt', 'sqrt',
                   'sqrt', 'sqrt', 'sqrt', 'auto', 'auto', 'auto', 'auto',
                   'auto', 'auto', 'auto', 'auto', 'auto', 'auto', 'auto',
                   'auto', 'auto', 'auto', 'auto', 'auto', 'auto', 'auto',
                   'auto', 'auto', 'auto', 'auto', 'auto', 'auto', 'sqrt',
                   'sqrt', 'sqrt', 'sqrt', 'sqrt', 'sqrt', 'sqrt', 'sqrt',
                   'sqrt', 'sqrt', 'sqrt', 'sqrt', 'sqrt', 'sqrt', 'sqrt',
                   'sqrt', 'sqrt', 'sqrt', 'sqrt', 'sqrt', 'sqrt', 'sqrt',
                   'sqrt', 'sqrt', 'auto', 'auto', 'auto', 'auto', 'auto',
                   'auto', 'auto', 'auto', 'auto', 'auto', 'auto', 'auto',
                   'auto', 'auto', 'auto', 'auto', 'auto', 'auto', 'auto',
                   'auto', 'auto', 'auto', 'auto', 'auto', 'sqrt', 'sqrt',
                   'sqrt', 'sqrt', 'sqrt', 'sqrt', 'sqrt', 'sqrt', 'sqrt',
                   'sqrt', 'sqrt', 'sqrt', 'sqrt', 'sqrt', 'sqrt', 'sqrt',
                   'sqrt', 'sqrt', 'sqrt', 'sqrt', 'sqrt', 'sqrt', 'sqrt',
                   'sqrt'],
             mask=[False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False],
       fill_value='?',
            dtype=object), 'param_clf__min_samples_leaf': masked_array(data=[None, None, None, None, 1, 1, 1, 1, 2, 2, 2, 2, 4, 4,
                   4, 4, 8, 8, 8, 8, 10, 10, 10, 10, None, None, None,
                   None, 1, 1, 1, 1, 2, 2, 2, 2, 4, 4, 4, 4, 8, 8, 8, 8,
                   10, 10, 10, 10, None, None, None, None, 1, 1, 1, 1, 2,
                   2, 2, 2, 4, 4, 4, 4, 8, 8, 8, 8, 10, 10, 10, 10, None,
                   None, None, None, 1, 1, 1, 1, 2, 2, 2, 2, 4, 4, 4, 4,
                   8, 8, 8, 8, 10, 10, 10, 10, None, None, None, None, 1,
                   1, 1, 1, 2, 2, 2, 2, 4, 4, 4, 4, 8, 8, 8, 8, 10, 10,
                   10, 10, None, None, None, None, 1, 1, 1, 1, 2, 2, 2, 2,
                   4, 4, 4, 4, 8, 8, 8, 8, 10, 10, 10, 10, None, None,
                   None, None, 1, 1, 1, 1, 2, 2, 2, 2, 4, 4, 4, 4, 8, 8,
                   8, 8, 10, 10, 10, 10, None, None, None, None, 1, 1, 1,
                   1, 2, 2, 2, 2, 4, 4, 4, 4, 8, 8, 8, 8, 10, 10, 10, 10,
                   None, None, None, None, 1, 1, 1, 1, 2, 2, 2, 2, 4, 4,
                   4, 4, 8, 8, 8, 8, 10, 10, 10, 10, None, None, None,
                   None, 1, 1, 1, 1, 2, 2, 2, 2, 4, 4, 4, 4, 8, 8, 8, 8,
                   10, 10, 10, 10, None, None, None, None, 1, 1, 1, 1, 2,
                   2, 2, 2, 4, 4, 4, 4, 8, 8, 8, 8, 10, 10, 10, 10, None,
                   None, None, None, 1, 1, 1, 1, 2, 2, 2, 2, 4, 4, 4, 4,
                   8, 8, 8, 8, 10, 10, 10, 10],
             mask=[False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False],
       fill_value='?',
            dtype=object), 'param_clf__n_estimators': masked_array(data=[100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
                   100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
                   100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
                   100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
                   100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
                   100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
                   100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
                   100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
                   100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
                   100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
                   100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
                   100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
                   100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
                   100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
                   100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
                   100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
                   100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
                   100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
                   100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
                   100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
                   100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
                   100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
                   100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
                   100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
                   100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
                   100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
                   100, 100],
             mask=[False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False],
       fill_value='?',
            dtype=object), 'param_tfidf__use_idf': masked_array(data=[True, True, False, False, True, True, False, False,
                   True, True, False, False, True, True, False, False,
                   True, True, False, False, True, True, False, False,
                   True, True, False, False, True, True, False, False,
                   True, True, False, False, True, True, False, False,
                   True, True, False, False, True, True, False, False,
                   True, True, False, False, True, True, False, False,
                   True, True, False, False, True, True, False, False,
                   True, True, False, False, True, True, False, False,
                   True, True, False, False, True, True, False, False,
                   True, True, False, False, True, True, False, False,
                   True, True, False, False, True, True, False, False,
                   True, True, False, False, True, True, False, False,
                   True, True, False, False, True, True, False, False,
                   True, True, False, False, True, True, False, False,
                   True, True, False, False, True, True, False, False,
                   True, True, False, False, True, True, False, False,
                   True, True, False, False, True, True, False, False,
                   True, True, False, False, True, True, False, False,
                   True, True, False, False, True, True, False, False,
                   True, True, False, False, True, True, False, False,
                   True, True, False, False, True, True, False, False,
                   True, True, False, False, True, True, False, False,
                   True, True, False, False, True, True, False, False,
                   True, True, False, False, True, True, False, False,
                   True, True, False, False, True, True, False, False,
                   True, True, False, False, True, True, False, False,
                   True, True, False, False, True, True, False, False,
                   True, True, False, False, True, True, False, False,
                   True, True, False, False, True, True, False, False,
                   True, True, False, False, True, True, False, False,
                   True, True, False, False, True, True, False, False,
                   True, True, False, False, True, True, False, False,
                   True, True, False, False, True, True, False, False,
                   True, True, False, False, True, True, False, False,
                   True, True, False, False, True, True, False, False],
             mask=[False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False],
       fill_value='?',
            dtype=object), 'param_vect__ngram_range': masked_array(data=[(1, 1), (1, 2), (1, 1), (1, 2), (1, 1), (1, 2), (1, 1),
                   (1, 2), (1, 1), (1, 2), (1, 1), (1, 2), (1, 1), (1, 2),
                   (1, 1), (1, 2), (1, 1), (1, 2), (1, 1), (1, 2), (1, 1),
                   (1, 2), (1, 1), (1, 2), (1, 1), (1, 2), (1, 1), (1, 2),
                   (1, 1), (1, 2), (1, 1), (1, 2), (1, 1), (1, 2), (1, 1),
                   (1, 2), (1, 1), (1, 2), (1, 1), (1, 2), (1, 1), (1, 2),
                   (1, 1), (1, 2), (1, 1), (1, 2), (1, 1), (1, 2), (1, 1),
                   (1, 2), (1, 1), (1, 2), (1, 1), (1, 2), (1, 1), (1, 2),
                   (1, 1), (1, 2), (1, 1), (1, 2), (1, 1), (1, 2), (1, 1),
                   (1, 2), (1, 1), (1, 2), (1, 1), (1, 2), (1, 1), (1, 2),
                   (1, 1), (1, 2), (1, 1), (1, 2), (1, 1), (1, 2), (1, 1),
                   (1, 2), (1, 1), (1, 2), (1, 1), (1, 2), (1, 1), (1, 2),
                   (1, 1), (1, 2), (1, 1), (1, 2), (1, 1), (1, 2), (1, 1),
                   (1, 2), (1, 1), (1, 2), (1, 1), (1, 2), (1, 1), (1, 2),
                   (1, 1), (1, 2), (1, 1), (1, 2), (1, 1), (1, 2), (1, 1),
                   (1, 2), (1, 1), (1, 2), (1, 1), (1, 2), (1, 1), (1, 2),
                   (1, 1), (1, 2), (1, 1), (1, 2), (1, 1), (1, 2), (1, 1),
                   (1, 2), (1, 1), (1, 2), (1, 1), (1, 2), (1, 1), (1, 2),
                   (1, 1), (1, 2), (1, 1), (1, 2), (1, 1), (1, 2), (1, 1),
                   (1, 2), (1, 1), (1, 2), (1, 1), (1, 2), (1, 1), (1, 2),
                   (1, 1), (1, 2), (1, 1), (1, 2), (1, 1), (1, 2), (1, 1),
                   (1, 2), (1, 1), (1, 2), (1, 1), (1, 2), (1, 1), (1, 2),
                   (1, 1), (1, 2), (1, 1), (1, 2), (1, 1), (1, 2), (1, 1),
                   (1, 2), (1, 1), (1, 2), (1, 1), (1, 2), (1, 1), (1, 2),
                   (1, 1), (1, 2), (1, 1), (1, 2), (1, 1), (1, 2), (1, 1),
                   (1, 2), (1, 1), (1, 2), (1, 1), (1, 2), (1, 1), (1, 2),
                   (1, 1), (1, 2), (1, 1), (1, 2), (1, 1), (1, 2), (1, 1),
                   (1, 2), (1, 1), (1, 2), (1, 1), (1, 2), (1, 1), (1, 2),
                   (1, 1), (1, 2), (1, 1), (1, 2), (1, 1), (1, 2), (1, 1),
                   (1, 2), (1, 1), (1, 2), (1, 1), (1, 2), (1, 1), (1, 2),
                   (1, 1), (1, 2), (1, 1), (1, 2), (1, 1), (1, 2), (1, 1),
                   (1, 2), (1, 1), (1, 2), (1, 1), (1, 2), (1, 1), (1, 2),
                   (1, 1), (1, 2), (1, 1), (1, 2), (1, 1), (1, 2), (1, 1),
                   (1, 2), (1, 1), (1, 2), (1, 1), (1, 2), (1, 1), (1, 2),
                   (1, 1), (1, 2), (1, 1), (1, 2), (1, 1), (1, 2), (1, 1),
                   (1, 2), (1, 1), (1, 2), (1, 1), (1, 2), (1, 1), (1, 2),
                   (1, 1), (1, 2), (1, 1), (1, 2), (1, 1), (1, 2), (1, 1),
                   (1, 2), (1, 1), (1, 2), (1, 1), (1, 2), (1, 1), (1, 2),
                   (1, 1), (1, 2), (1, 1), (1, 2), (1, 1), (1, 2), (1, 1),
                   (1, 2), (1, 1), (1, 2), (1, 1), (1, 2), (1, 1), (1, 2),
                   (1, 1), (1, 2), (1, 1), (1, 2), (1, 1), (1, 2), (1, 1),
                   (1, 2)],
             mask=[False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False,
                   False, False, False, False, False, False, False, False],
       fill_value='?',
            dtype=object), 'params': [{'clf__bootstrap': True, 'clf__max_depth': None, 'clf__max_features': 'auto', 'clf__min_samples_leaf': None, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': None, 'clf__max_features': 'auto', 'clf__min_samples_leaf': None, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': None, 'clf__max_features': 'auto', 'clf__min_samples_leaf': None, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': None, 'clf__max_features': 'auto', 'clf__min_samples_leaf': None, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': None, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 1, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': None, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 1, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': None, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 1, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': None, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 1, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': None, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 2, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': None, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 2, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': None, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 2, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': None, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 2, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': None, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 4, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': None, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 4, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': None, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 4, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': None, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 4, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': None, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 8, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': None, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 8, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': None, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 8, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': None, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 8, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': None, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 10, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': None, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 10, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': None, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 10, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': None, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 10, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': None, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': None, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': None, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': None, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': None, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': None, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': None, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': None, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': None, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 1, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': None, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 1, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': None, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 1, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': None, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 1, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': None, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 2, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': None, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 2, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': None, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 2, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': None, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 2, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': None, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 4, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': None, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 4, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': None, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 4, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': None, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 4, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': None, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 8, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': None, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 8, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': None, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 8, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': None, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 8, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': None, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 10, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': None, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 10, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': None, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 10, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': None, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 10, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 10, 'clf__max_features': 'auto', 'clf__min_samples_leaf': None, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 10, 'clf__max_features': 'auto', 'clf__min_samples_leaf': None, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 10, 'clf__max_features': 'auto', 'clf__min_samples_leaf': None, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 10, 'clf__max_features': 'auto', 'clf__min_samples_leaf': None, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 10, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 1, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 10, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 1, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 10, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 1, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 10, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 1, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 10, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 2, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 10, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 2, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 10, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 2, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 10, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 2, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 10, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 4, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 10, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 4, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 10, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 4, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 10, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 4, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 10, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 8, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 10, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 8, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 10, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 8, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 10, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 8, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 10, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 10, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 10, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 10, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 10, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 10, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 10, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 10, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 10, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': None, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 10, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': None, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 10, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': None, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 10, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': None, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 10, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 1, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 10, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 1, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 10, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 1, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 10, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 1, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 10, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 2, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 10, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 2, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 10, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 2, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 10, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 2, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 10, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 4, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 10, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 4, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 10, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 4, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 10, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 4, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 10, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 8, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 10, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 8, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 10, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 8, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 10, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 8, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 10, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 10, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 10, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 10, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 10, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 10, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 10, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 10, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 20, 'clf__max_features': 'auto', 'clf__min_samples_leaf': None, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 20, 'clf__max_features': 'auto', 'clf__min_samples_leaf': None, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 20, 'clf__max_features': 'auto', 'clf__min_samples_leaf': None, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 20, 'clf__max_features': 'auto', 'clf__min_samples_leaf': None, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 20, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 1, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 20, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 1, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 20, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 1, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 20, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 1, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 20, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 2, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 20, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 2, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 20, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 2, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 20, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 2, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 20, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 4, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 20, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 4, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 20, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 4, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 20, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 4, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 20, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 8, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 20, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 8, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 20, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 8, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 20, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 8, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 20, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 10, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 20, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 10, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 20, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 10, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 20, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 10, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 20, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': None, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 20, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': None, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 20, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': None, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 20, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': None, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 20, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 1, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 20, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 1, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 20, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 1, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 20, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 1, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 20, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 2, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 20, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 2, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 20, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 2, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 20, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 2, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 20, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 4, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 20, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 4, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 20, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 4, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 20, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 4, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 20, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 8, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 20, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 8, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 20, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 8, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 20, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 8, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 20, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 10, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 20, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 10, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 20, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 10, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 20, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 10, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 30, 'clf__max_features': 'auto', 'clf__min_samples_leaf': None, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 30, 'clf__max_features': 'auto', 'clf__min_samples_leaf': None, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 30, 'clf__max_features': 'auto', 'clf__min_samples_leaf': None, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 30, 'clf__max_features': 'auto', 'clf__min_samples_leaf': None, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 30, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 1, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 30, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 1, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 30, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 1, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 30, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 1, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 30, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 2, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 30, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 2, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 30, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 2, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 30, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 2, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 30, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 4, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 30, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 4, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 30, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 4, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 30, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 4, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 30, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 8, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 30, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 8, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 30, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 8, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 30, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 8, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 30, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 10, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 30, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 10, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 30, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 10, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 30, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 10, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 30, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': None, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 30, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': None, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 30, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': None, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 30, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': None, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 30, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 1, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 30, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 1, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 30, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 1, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 30, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 1, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 30, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 2, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 30, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 2, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 30, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 2, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 30, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 2, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 30, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 4, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 30, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 4, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 30, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 4, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 30, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 4, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 30, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 8, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 30, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 8, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 30, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 8, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 30, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 8, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 30, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 10, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 30, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 10, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 30, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 10, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 30, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 10, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 40, 'clf__max_features': 'auto', 'clf__min_samples_leaf': None, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 40, 'clf__max_features': 'auto', 'clf__min_samples_leaf': None, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 40, 'clf__max_features': 'auto', 'clf__min_samples_leaf': None, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 40, 'clf__max_features': 'auto', 'clf__min_samples_leaf': None, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 40, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 1, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 40, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 1, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 40, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 1, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 40, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 1, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 40, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 2, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 40, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 2, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 40, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 2, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 40, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 2, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 40, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 4, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 40, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 4, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 40, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 4, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 40, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 4, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 40, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 8, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 40, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 8, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 40, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 8, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 40, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 8, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 40, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 10, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 40, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 10, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 40, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 10, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 40, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 10, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 40, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': None, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 40, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': None, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 40, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': None, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 40, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': None, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 40, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 1, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 40, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 1, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 40, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 1, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 40, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 1, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 40, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 2, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 40, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 2, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 40, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 2, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 40, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 2, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 40, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 4, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 40, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 4, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 40, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 4, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 40, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 4, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 40, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 8, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 40, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 8, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 40, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 8, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 40, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 8, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 40, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 10, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 40, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 10, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 40, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 10, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 40, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 10, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 50, 'clf__max_features': 'auto', 'clf__min_samples_leaf': None, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 50, 'clf__max_features': 'auto', 'clf__min_samples_leaf': None, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 50, 'clf__max_features': 'auto', 'clf__min_samples_leaf': None, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 50, 'clf__max_features': 'auto', 'clf__min_samples_leaf': None, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 50, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 1, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 50, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 1, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 50, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 1, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 50, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 1, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 50, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 2, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 50, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 2, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 50, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 2, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 50, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 2, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 50, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 4, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 50, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 4, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 50, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 4, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 50, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 4, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 50, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 8, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 50, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 8, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 50, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 8, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 50, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 8, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 50, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 10, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 50, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 10, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 50, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 10, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 50, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 10, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 50, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': None, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 50, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': None, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 50, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': None, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 50, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': None, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 50, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 1, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 50, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 1, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 50, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 1, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 50, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 1, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 50, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 2, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 50, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 2, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 50, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 2, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 50, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 2, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 50, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 4, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 50, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 4, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 50, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 4, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 50, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 4, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 50, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 8, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 50, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 8, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 50, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 8, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 50, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 8, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 50, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 10, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 50, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 10, 'clf__n_estimators': 100, 'tfidf__use_idf': True, 'vect__ngram_range': (1, 2)}, {'clf__bootstrap': True, 'clf__max_depth': 50, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 10, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 1)}, {'clf__bootstrap': True, 'clf__max_depth': 50, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 10, 'clf__n_estimators': 100, 'tfidf__use_idf': False, 'vect__ngram_range': (1, 2)}], 'split0_test_score': array([       nan,        nan,        nan,        nan, 0.93300199,
       0.93419483, 0.93220676, 0.93439364, 0.87992048, 0.87813121,
       0.87813121, 0.87872763, 0.81371769, 0.80477137, 0.81073559,
       0.79801193, 0.73717694, 0.71809145, 0.74015905, 0.71530815,
       0.71153082, 0.68707753, 0.71351889, 0.68866799,        nan,
              nan,        nan,        nan, 0.93399602, 0.9333996 ,
       0.93359841, 0.93359841, 0.88151093, 0.88210736, 0.88131213,
       0.88230616, 0.81510934, 0.80417495, 0.81033797, 0.80417495,
       0.73558648, 0.72445328, 0.73021869, 0.7194831 , 0.71172962,
       0.68906561, 0.70516899, 0.68210736,        nan,        nan,
              nan,        nan, 0.29403579, 0.23280318, 0.29662028,
       0.24970179, 0.30974155, 0.2499006 , 0.29184891, 0.2445328 ,
       0.28250497, 0.23499006, 0.27514911, 0.22246521, 0.25387674,
       0.22763419, 0.25427435, 0.23717694, 0.25884692, 0.22445328,
       0.2610338 , 0.22763419,        nan,        nan,        nan,
              nan, 0.29840954, 0.24413519, 0.30357853, 0.24652087,
       0.272167  , 0.2359841 , 0.29940358, 0.23101392, 0.26799205,
       0.23836978, 0.2833002 , 0.2473161 , 0.25347913, 0.22306163,
       0.26163022, 0.23161034, 0.25765408, 0.22286282, 0.26262425,
       0.22862823,        nan,        nan,        nan,        nan,
       0.53856859, 0.38528827, 0.5332008 , 0.38926441, 0.49582505,
       0.36759443, 0.49125249, 0.37753479, 0.42743539, 0.3499006 ,
       0.44532803, 0.36282306, 0.39960239, 0.30994036, 0.3972167 ,
       0.33081511, 0.39284294, 0.31968191, 0.38727634, 0.33777336,
              nan,        nan,        nan,        nan, 0.5499006 ,
       0.39284294, 0.53638171, 0.3944334 , 0.49602386, 0.37614314,
       0.48429423, 0.3695825 , 0.4584493 , 0.34254473, 0.44910537,
       0.35109344, 0.40318091, 0.32445328, 0.39741551, 0.33677932,
       0.39204771, 0.30675944, 0.37952286, 0.32246521,        nan,
              nan,        nan,        nan, 0.68588469, 0.51192843,
       0.69363817, 0.51908549, 0.59880716, 0.46858847, 0.61590457,
       0.48071571, 0.54970179, 0.44194831, 0.55467197, 0.43757455,
       0.49125249, 0.39045726, 0.49642147, 0.39125249, 0.47972167,
       0.38687873, 0.47395626, 0.39005964,        nan,        nan,
              nan,        nan, 0.67852883, 0.51431412, 0.69125249,
       0.53081511, 0.60775348, 0.47117296, 0.62664016, 0.49304175,
       0.55188867, 0.43081511, 0.55407555, 0.44930417, 0.48846918,
       0.40397614, 0.48727634, 0.38846918, 0.4666004 , 0.38926441,
       0.47375746, 0.39264414,        nan,        nan,        nan,
              nan, 0.76222664, 0.62882704, 0.77574553, 0.63916501,
       0.68151093, 0.55248509, 0.68807157, 0.55666004, 0.61232604,
       0.50198807, 0.63280318, 0.5111332 , 0.54294235, 0.46640159,
       0.5612326 , 0.45964215, 0.52942346, 0.43638171, 0.52683897,
       0.43538767,        nan,        nan,        nan,        nan,
       0.76858847, 0.61013917, 0.77574553, 0.62803181, 0.67833002,
       0.53161034, 0.69642147, 0.56322068, 0.61371769, 0.50854871,
       0.62902584, 0.51928429, 0.54294235, 0.44811133, 0.55168986,
       0.46421471, 0.53141153, 0.4445328 , 0.52584493, 0.44373757,
              nan,        nan,        nan,        nan, 0.81709742,
       0.69920477, 0.82326044, 0.7111332 , 0.73021869, 0.60735586,
       0.73300199, 0.62604374, 0.65129225, 0.5612326 , 0.65904573,
       0.5526839 , 0.59105368, 0.49105368, 0.59085487, 0.48031809,
       0.57017893, 0.47395626, 0.57236581, 0.48369781,        nan,
              nan,        nan,        nan, 0.81192843, 0.68926441,
       0.82524851, 0.70497018, 0.72027833, 0.61709742, 0.73638171,
       0.61689861, 0.6584493 , 0.54930417, 0.65606362, 0.54950298,
       0.59085487, 0.47952286, 0.58568588, 0.49980119, 0.56063618,
       0.48270378, 0.57157058, 0.47296223]), 'split1_test_score': array([       nan,        nan,        nan,        nan, 0.9415507 ,
       0.94353877, 0.94393638, 0.94333996, 0.88230616, 0.87892644,
       0.88131213, 0.87892644, 0.81451292, 0.80437376, 0.81351889,
       0.80735586, 0.7387674 , 0.71530815, 0.73141153, 0.72166998,
       0.71570577, 0.6888668 , 0.71530815, 0.68290258,        nan,
              nan,        nan,        nan, 0.94254473, 0.94274354,
       0.94254473, 0.94254473, 0.88310139, 0.88111332, 0.88290258,
       0.87833002, 0.81232604, 0.80258449, 0.81332008, 0.80258449,
       0.73936382, 0.71530815, 0.73419483, 0.72027833, 0.71351889,
       0.67912525, 0.70636183, 0.68707753,        nan,        nan,
              nan,        nan, 0.29960239, 0.2389662 , 0.29821074,
       0.24393638, 0.27117296, 0.244334  , 0.29244533, 0.22703777,
       0.26799205, 0.24353877, 0.27912525, 0.23300199, 0.2610338 ,
       0.22345924, 0.25944334, 0.22286282, 0.2473161 , 0.22266402,
       0.2612326 , 0.21968191,        nan,        nan,        nan,
              nan, 0.30357853, 0.24393638, 0.2888668 , 0.23300199,
       0.25964215, 0.23658052, 0.27355865, 0.24194831, 0.27176938,
       0.22365805, 0.26222664, 0.22306163, 0.24373757, 0.22087475,
       0.2526839 , 0.227833  , 0.25069583, 0.22763419, 0.25328032,
       0.22007952,        nan,        nan,        nan,        nan,
       0.54015905, 0.38489066, 0.54075547, 0.40536779, 0.48389662,
       0.36858847, 0.49324056, 0.36799205, 0.43996024, 0.35666004,
       0.43280318, 0.35168986, 0.3944334 , 0.3194831 , 0.39781312,
       0.31868787, 0.38230616, 0.32127237, 0.37932406, 0.30159046,
              nan,        nan,        nan,        nan, 0.51570577,
       0.38628231, 0.51749503, 0.39065606, 0.48111332, 0.37852883,
       0.48469185, 0.37912525, 0.43280318, 0.34055666, 0.43956262,
       0.33956262, 0.39980119, 0.33240557, 0.38389662, 0.31968191,
       0.37554672, 0.32246521, 0.3777336 , 0.31888668,        nan,
              nan,        nan,        nan, 0.67654076, 0.52803181,
       0.68449304, 0.54194831, 0.59761431, 0.47137177, 0.60318091,
       0.46421471, 0.54970179, 0.43379722, 0.54055666, 0.43359841,
       0.49025845, 0.39701789, 0.49244533, 0.40337972, 0.45924453,
       0.37813121, 0.46481113, 0.37852883,        nan,        nan,
              nan,        nan, 0.66600398, 0.52803181, 0.67236581,
       0.52683897, 0.60099404, 0.45805169, 0.60119284, 0.47455268,
       0.53777336, 0.42286282, 0.54751491, 0.4306163 , 0.48151093,
       0.39502982, 0.47514911, 0.3888668 , 0.45924453, 0.38787276,
       0.46779324, 0.38528827,        nan,        nan,        nan,
              nan, 0.75944334, 0.61252485, 0.77693837, 0.61809145,
       0.67316103, 0.55149105, 0.68091451, 0.57514911, 0.60994036,
       0.50675944, 0.61749503, 0.51192843, 0.54294235, 0.45407555,
       0.54194831, 0.4471173 , 0.52564612, 0.42664016, 0.52862823,
       0.42743539,        nan,        nan,        nan,        nan,
       0.75109344, 0.61908549, 0.76143141, 0.63081511, 0.67554672,
       0.55506958, 0.69602386, 0.56640159, 0.61491054, 0.49801193,
       0.61510934, 0.51868787, 0.54791252, 0.44214712, 0.54572565,
       0.44811133, 0.52524851, 0.43300199, 0.51610338, 0.43956262,
              nan,        nan,        nan,        nan, 0.80974155,
       0.68091451, 0.82485089, 0.69741551, 0.72524851, 0.60695825,
       0.73260437, 0.60894632, 0.66043738, 0.55586481, 0.66083499,
       0.5582505 , 0.58767396, 0.49304175, 0.59204771, 0.49284294,
       0.57375746, 0.46461233, 0.55487078, 0.46341948,        nan,
              nan,        nan,        nan, 0.81093439, 0.70417495,
       0.82027833, 0.69880716, 0.71888668, 0.60656064, 0.72703777,
       0.61610338, 0.65328032, 0.54413519, 0.66421471, 0.56182903,
       0.59304175, 0.48846918, 0.59105368, 0.4972167 , 0.56222664,
       0.47574553, 0.57316103, 0.4638171 ]), 'split2_test_score': array([       nan,        nan,        nan,        nan, 0.94214712,
       0.94314115, 0.94115308, 0.94333996, 0.87574553, 0.87316103,
       0.8777336 , 0.87594433, 0.79860835, 0.79960239, 0.80377734,
       0.80079523, 0.7361829 , 0.71988072, 0.72922465, 0.71769384,
       0.70258449, 0.68230616, 0.71033797, 0.68747515,        nan,
              nan,        nan,        nan, 0.94274354, 0.9417495 ,
       0.94214712, 0.94254473, 0.87833002, 0.88031809, 0.87614314,
       0.87753479, 0.80735586, 0.79761431, 0.80337972, 0.80059642,
       0.7359841 , 0.7139165 , 0.73836978, 0.71749503, 0.70755467,
       0.69005964, 0.71252485, 0.68648111,        nan,        nan,
              nan,        nan, 0.29562624, 0.24035785, 0.31252485,
       0.25248509, 0.27892644, 0.23280318, 0.30576541, 0.22962227,
       0.25745527, 0.22186879, 0.27395626, 0.22246521, 0.26858847,
       0.22147117, 0.25109344, 0.22166998, 0.25208748, 0.2166998 ,
       0.25884692, 0.21292247,        nan,        nan,        nan,
              nan, 0.30357853, 0.25407555, 0.29781312, 0.24373757,
       0.27932406, 0.22544732, 0.29125249, 0.227833  , 0.2638171 ,
       0.22405567, 0.29005964, 0.22405567, 0.25467197, 0.21630219,
       0.25765408, 0.22743539, 0.24910537, 0.22664016, 0.25526839,
       0.21709742,        nan,        nan,        nan,        nan,
       0.53220676, 0.39681909, 0.54333996, 0.3777336 , 0.47833002,
       0.38031809, 0.49363817, 0.36003976, 0.45447316, 0.33697813,
       0.44274354, 0.3445328 , 0.40218688, 0.32147117, 0.41968191,
       0.32246521, 0.39204771, 0.31789264, 0.39363817, 0.32127237,
              nan,        nan,        nan,        nan, 0.54075547,
       0.39463221, 0.5471173 , 0.40994036, 0.48986083, 0.38071571,
       0.49960239, 0.36222664, 0.4610338 , 0.34393638, 0.45447316,
       0.35228628, 0.41510934, 0.34194831, 0.39662028, 0.3333996 ,
       0.38230616, 0.32087475, 0.36719682, 0.31530815,        nan,
              nan,        nan,        nan, 0.67236581, 0.51570577,
       0.67435388, 0.53737575, 0.59880716, 0.49264414, 0.60258449,
       0.47594433, 0.56421471, 0.43240557, 0.56858847, 0.44751491,
       0.49502982, 0.41133201, 0.49483101, 0.39860835, 0.48409543,
       0.38727634, 0.47932406, 0.37554672,        nan,        nan,
              nan,        nan, 0.67017893, 0.51053678, 0.69005964,
       0.5361829 , 0.60695825, 0.47833002, 0.60636183, 0.47992048,
       0.56580517, 0.44751491, 0.5528827 , 0.45168986, 0.48966203,
       0.39483101, 0.48986083, 0.41510934, 0.47057654, 0.37932406,
       0.47992048, 0.38131213,        nan,        nan,        nan,
              nan, 0.77276342, 0.62345924, 0.77316103, 0.62842942,
       0.66998012, 0.5528827 , 0.68369781, 0.55168986, 0.61610338,
       0.51292247, 0.61033797, 0.51451292, 0.55049702, 0.45904573,
       0.55387674, 0.45387674, 0.51908549, 0.44314115, 0.53817097,
       0.43658052,        nan,        nan,        nan,        nan,
       0.75785288, 0.60218688, 0.77833002, 0.63280318, 0.6777336 ,
       0.54652087, 0.68031809, 0.55964215, 0.61829026, 0.5083499 ,
       0.62425447, 0.51709742, 0.54393638, 0.44353877, 0.55467197,
       0.455666  , 0.54095427, 0.43101392, 0.52445328, 0.44115308,
              nan,        nan,        nan,        nan, 0.81172962,
       0.68011928, 0.83220676, 0.70417495, 0.71312127, 0.60695825,
       0.71749503, 0.61471173, 0.64870775, 0.54970179, 0.6528827 ,
       0.56242545, 0.58588469, 0.4805169 , 0.59145129, 0.47912525,
       0.56421471, 0.47673956, 0.57157058, 0.48230616,        nan,
              nan,        nan,        nan, 0.81530815, 0.69801193,
       0.82743539, 0.70238569, 0.71431412, 0.60337972, 0.72166998,
       0.61530815, 0.65029821, 0.55666004, 0.65705765, 0.55367793,
       0.58568588, 0.49622266, 0.59542744, 0.49304175, 0.56182903,
       0.47296223, 0.56719682, 0.48349901]), 'split3_test_score': array([       nan,        nan,        nan,        nan, 0.94353877,
       0.94314115, 0.94393638, 0.94254473, 0.8749503 , 0.87375746,
       0.87992048, 0.87693837, 0.81471173, 0.80218688, 0.81510934,
       0.81153082, 0.7473161 , 0.72564612, 0.7471173 , 0.73200795,
       0.71709742, 0.68866799, 0.72524851, 0.70457256,        nan,
              nan,        nan,        nan, 0.93996024, 0.94393638,
       0.94393638, 0.94314115, 0.87514911, 0.87097416, 0.87335984,
       0.87316103, 0.81829026, 0.80357853, 0.81789264, 0.81153082,
       0.74950298, 0.72763419, 0.74493042, 0.73399602, 0.72743539,
       0.69662028, 0.73180915, 0.69801193,        nan,        nan,
              nan,        nan, 0.28588469, 0.23777336, 0.29244533,
       0.24314115, 0.27534791, 0.227833  , 0.27137177, 0.23200795,
       0.26182903, 0.23101392, 0.26421471, 0.22624254, 0.24910537,
       0.20198807, 0.26600398, 0.2139165 , 0.2417495 , 0.22206759,
       0.24512922, 0.21630219,        nan,        nan,        nan,
              nan, 0.28588469, 0.23658052, 0.27872763, 0.23439364,
       0.28011928, 0.23638171, 0.2693837 , 0.23697813, 0.25685885,
       0.22326044, 0.26322068, 0.23717694, 0.24950298, 0.22246521,
       0.25109344, 0.23976143, 0.23996024, 0.21888668, 0.24671968,
       0.22246521,        nan,        nan,        nan,        nan,
       0.50854871, 0.39681909, 0.53220676, 0.40715706, 0.47435388,
       0.37117296, 0.4777336 , 0.36640159, 0.45308151, 0.36481113,
       0.45506958, 0.3584493 , 0.3916501 , 0.3194831 , 0.39542744,
       0.33001988, 0.37335984, 0.31212724, 0.39304175, 0.31749503,
              nan,        nan,        nan,        nan, 0.52584493,
       0.39840954, 0.53021869, 0.40337972, 0.47614314, 0.37017893,
       0.47673956, 0.37117296, 0.43757455, 0.3500994 , 0.455666  ,
       0.35984095, 0.3860835 , 0.32723658, 0.41153082, 0.32524851,
       0.38667992, 0.30974155, 0.38131213, 0.32087475,        nan,
              nan,        nan,        nan, 0.66898608, 0.50735586,
       0.68151093, 0.51212724, 0.6166998 , 0.47554672, 0.60755467,
       0.47813121, 0.54831014, 0.42683897, 0.56699801, 0.42882704,
       0.49085487, 0.39483101, 0.49781312, 0.40258449, 0.46023857,
       0.37296223, 0.47037773, 0.39363817,        nan,        nan,
              nan,        nan, 0.67952286, 0.51431412, 0.67554672,
       0.51212724, 0.59383698, 0.46063618, 0.61709742, 0.46978131,
       0.54910537, 0.43856859, 0.54095427, 0.45447316, 0.50298211,
       0.41471173, 0.48807157, 0.41073559, 0.45069583, 0.38131213,
       0.47852883, 0.38031809,        nan,        nan,        nan,
              nan, 0.76361829, 0.60258449, 0.76719682, 0.61212724,
       0.67952286, 0.54373757, 0.67673956, 0.54612326, 0.62067594,
       0.50039761, 0.61968191, 0.49860835, 0.54791252, 0.4554672 ,
       0.54294235, 0.4417495 , 0.51371769, 0.42803181, 0.53220676,
       0.42723658,        nan,        nan,        nan,        nan,
       0.76242545, 0.59880716, 0.77276342, 0.62266402, 0.67236581,
       0.56461233, 0.67594433, 0.5554672 , 0.61093439, 0.51053678,
       0.6166998 , 0.48986083, 0.54930417, 0.4473161 , 0.55208748,
       0.45149105, 0.52445328, 0.43081511, 0.52882704, 0.44254473,
              nan,        nan,        nan,        nan, 0.80218688,
       0.69343936, 0.82365805, 0.69801193, 0.71709742, 0.61272366,
       0.7194831 , 0.6       , 0.6612326 , 0.55685885, 0.66222664,
       0.57117296, 0.60119284, 0.48190855, 0.59662028, 0.49324056,
       0.58071571, 0.46481113, 0.57594433, 0.4805169 ,        nan,
              nan,        nan,        nan, 0.81411531, 0.69025845,
       0.82485089, 0.70815109, 0.71729622, 0.58866799, 0.71888668,
       0.61928429, 0.65109344, 0.55407555, 0.66540755, 0.56620278,
       0.58846918, 0.49284294, 0.60417495, 0.50854871, 0.5554672 ,
       0.47117296, 0.56978131, 0.47196819]), 'split4_test_score': array([       nan,        nan,        nan,        nan, 0.93736329,
       0.93676675, 0.93835753, 0.93835753, 0.87751044, 0.87751044,
       0.87611851, 0.87830583, 0.82342414, 0.80890833, 0.81904951,
       0.81427719, 0.74885663, 0.7265858 , 0.74468085, 0.72758004,
       0.7265858 , 0.6967588 , 0.72221118, 0.69576457,        nan,
              nan,        nan,        nan, 0.9369656 , 0.93955061,
       0.93776099, 0.93736329, 0.87611851, 0.87910121, 0.87273812,
       0.87492543, 0.81805528, 0.81487373, 0.81944721, 0.81288527,
       0.74647047, 0.73275005, 0.74925432, 0.72479618, 0.71346192,
       0.69636111, 0.7236031 , 0.69755419,        nan,        nan,
              nan,        nan, 0.29926427, 0.23801949, 0.28932193,
       0.23762179, 0.27739113, 0.22787831, 0.28614039, 0.25034798,
       0.27261881, 0.23642871, 0.28514615, 0.22807715, 0.26188109,
       0.22608869, 0.2640684 , 0.2163452 , 0.24816067, 0.21515212,
       0.24279181, 0.2163452 ,        nan,        nan,        nan,
              nan, 0.30085504, 0.24676874, 0.31159276, 0.24796182,
       0.29190694, 0.23841718, 0.27321535, 0.23881487, 0.28335653,
       0.22927023, 0.28116922, 0.22728177, 0.24060449, 0.22807715,
       0.25770531, 0.22071983, 0.27102804, 0.21773712, 0.25352953,
       0.21554981,        nan,        nan,        nan,        nan,
       0.53032412, 0.39033605, 0.53788029, 0.39113144, 0.48299861,
       0.3875522 , 0.48777093, 0.36369059, 0.44084311, 0.33624975,
       0.46609664, 0.34937363, 0.42215152, 0.32968781, 0.39192682,
       0.33446013, 0.38337642, 0.32073971, 0.39431298, 0.3275005 ,
              nan,        nan,        nan,        nan, 0.53609067,
       0.3811891 , 0.54066415, 0.40485186, 0.49075363, 0.3931199 ,
       0.49135017, 0.37184331, 0.44919467, 0.34499901, 0.43885464,
       0.35295287, 0.41300457, 0.32869358, 0.42075959, 0.33286936,
       0.3965003 , 0.32173394, 0.39113144, 0.32929012,        nan,
              nan,        nan,        nan, 0.6814476 , 0.53927222,
       0.6758799 , 0.53668721, 0.61801551, 0.48956055, 0.6128455 ,
       0.48896401, 0.55756612, 0.44362696, 0.55756612, 0.44561543,
       0.50188904, 0.40644263, 0.49095248, 0.39729569, 0.47424935,
       0.39928415, 0.47922052, 0.38178564,        nan,        nan,
              nan,        nan, 0.68244184, 0.52157487, 0.69437264,
       0.53072181, 0.61304434, 0.48101014, 0.61861205, 0.46748857,
       0.56213959, 0.44601312, 0.55100418, 0.43925234, 0.49592364,
       0.40564725, 0.49910519, 0.40186916, 0.47941937, 0.38178564,
       0.4714655 , 0.3875522 ,        nan,        nan,        nan,
              nan, 0.76555975, 0.61145357, 0.77669517, 0.63113939,
       0.67269835, 0.57605886, 0.68940147, 0.55219726, 0.62915092,
       0.50705906, 0.62537284, 0.52813681, 0.55995228, 0.45933585,
       0.56054882, 0.45217737, 0.54722609, 0.44223504, 0.53549413,
       0.43965003,        nan,        nan,        nan,        nan,
       0.76158282, 0.63094054, 0.77729171, 0.64088288, 0.67369258,
       0.55856035, 0.69417379, 0.56989461, 0.62258898, 0.5106383 ,
       0.61821436, 0.52058063, 0.56353152, 0.4625174 , 0.56949692,
       0.45535892, 0.52396103, 0.44402466, 0.54026645, 0.4412408 ,
              nan,        nan,        nan,        nan, 0.81964605,
       0.67906144, 0.84092265, 0.70491151, 0.72618811, 0.63094054,
       0.73195466, 0.63014516, 0.66255717, 0.55895804, 0.66931796,
       0.57685425, 0.59335852, 0.5076556 , 0.59673891, 0.51043945,
       0.58242195, 0.48538477, 0.58103003, 0.49154902,        nan,
              nan,        nan,        nan, 0.81825413, 0.69695765,
       0.82859415, 0.69298071, 0.72678465, 0.61662358, 0.73215351,
       0.62994631, 0.65679061, 0.56969576, 0.6635514 , 0.56452575,
       0.58719427, 0.50964406, 0.59395506, 0.49890634, 0.57088884,
       0.5012925 , 0.57148538, 0.4957248 ]), 'mean_test_score': array([       nan,        nan,        nan,        nan, 0.93952037,
       0.94015653, 0.93991803, 0.94039516, 0.87808658, 0.87629732,
       0.87864319, 0.87776852, 0.81299497, 0.80396855, 0.81243813,
       0.80639421, 0.74165999, 0.72110245, 0.73851868, 0.72285199,
       0.71470086, 0.68873546, 0.71732494, 0.69187657,        nan,
              nan,        nan,        nan, 0.93924203, 0.94027593,
       0.93999753, 0.93983846, 0.87884199, 0.87872283, 0.87729116,
       0.87725149, 0.81422736, 0.8045652 , 0.81287552, 0.80635439,
       0.74138157, 0.72281244, 0.73939361, 0.72320973, 0.7147401 ,
       0.69024638, 0.71589358, 0.69024642,        nan,        nan,
              nan,        nan, 0.29488267, 0.23758402, 0.29782463,
       0.24537724, 0.282516  , 0.23654982, 0.28951436, 0.23670976,
       0.26848002, 0.23356805, 0.2755183 , 0.22645042, 0.25889709,
       0.22012827, 0.2589767 , 0.22239429, 0.24963213, 0.22020736,
       0.25380687, 0.21857719,        nan,        nan,        nan,
              nan, 0.29846127, 0.24509928, 0.29611577, 0.24112318,
       0.27663188, 0.23456216, 0.28136275, 0.23531765, 0.26875878,
       0.22772283, 0.27599528, 0.23177842, 0.24839923, 0.22215619,
       0.25615339, 0.229472  , 0.25368871, 0.2227522 , 0.25428443,
       0.22076404,        nan,        nan,        nan,        nan,
       0.52996144, 0.39083063, 0.53747666, 0.39413086, 0.48308083,
       0.37504523, 0.48872715, 0.36713176, 0.44315868, 0.34891993,
       0.44840819, 0.35337373, 0.40200486, 0.32001311, 0.4004132 ,
       0.32728964, 0.38478662, 0.31834277, 0.38951866, 0.32112634,
              nan,        nan,        nan,        nan, 0.53365949,
       0.39067122, 0.53437537, 0.40065228, 0.48677896, 0.3797373 ,
       0.48733564, 0.37079013, 0.4478111 , 0.34442724, 0.44753236,
       0.35114723, 0.4034359 , 0.33094746, 0.40204456, 0.32959574,
       0.38661616, 0.31631498, 0.37937937, 0.32136498,        nan,
              nan,        nan,        nan, 0.67704499, 0.52045882,
       0.68197518, 0.5294448 , 0.60598879, 0.47954233, 0.60841403,
       0.47759399, 0.55389891, 0.4357234 , 0.55767625, 0.43862607,
       0.49385693, 0.40001616, 0.49449268, 0.39862415, 0.47150991,
       0.38490653, 0.47353794, 0.3839118 ,        nan,        nan,
              nan,        nan, 0.67533529, 0.51775434, 0.68471946,
       0.52733721, 0.60451742, 0.4698402 , 0.61398086, 0.47695696,
       0.55334243, 0.43715491, 0.54928632, 0.44506717, 0.49170958,
       0.40283919, 0.48789261, 0.40101001, 0.46530733, 0.3839118 ,
       0.4742931 , 0.38542296,        nan,        nan,        nan,
              nan, 0.76472229, 0.61576984, 0.77394738, 0.6257905 ,
       0.67537466, 0.55533106, 0.68376499, 0.5563639 , 0.61763933,
       0.50582533, 0.62113819, 0.51286394, 0.5488493 , 0.45886518,
       0.55210976, 0.45091261, 0.52701977, 0.43528597, 0.53226781,
       0.43325804,        nan,        nan,        nan,        nan,
       0.76030861, 0.61223185, 0.77311242, 0.6310394 , 0.67553375,
       0.55127469, 0.68857631, 0.56292524, 0.61608837, 0.50721712,
       0.62066076, 0.51310221, 0.54952539, 0.44872614, 0.55473437,
       0.4549684 , 0.52920572, 0.43667769, 0.52709902, 0.44164776,
              nan,        nan,        nan,        nan, 0.8120803 ,
       0.68654788, 0.82897976, 0.70312942, 0.7223748 , 0.61298731,
       0.72690783, 0.61596939, 0.65684543, 0.55652322, 0.6608616 ,
       0.56427741, 0.59183274, 0.49083529, 0.59354261, 0.49119326,
       0.57425775, 0.47310081, 0.5711563 , 0.48029787,        nan,
              nan,        nan,        nan, 0.81410808, 0.69573348,
       0.82528146, 0.70145897, 0.719512  , 0.60646587, 0.72722593,
       0.61950815, 0.65398238, 0.55477414, 0.66125899, 0.55914769,
       0.58904919, 0.49334034, 0.5940594 , 0.49950294, 0.56220958,
       0.4807754 , 0.57063903, 0.47759426]), 'std_test_score': array([       nan,        nan,        nan,        nan, 0.00385517,
       0.0039061 , 0.00437535, 0.00352518, 0.00271355, 0.00236788,
       0.00180101, 0.00114597, 0.00801893, 0.00308052, 0.00509749,
       0.00618162, 0.00533379, 0.00435534, 0.00709211, 0.00618222,
       0.00780847, 0.0046579 , 0.00555031, 0.00756974,        nan,
              nan,        nan,        nan, 0.00335818, 0.00372745,
       0.00380984, 0.0037582 , 0.0030523 , 0.00399749, 0.00412698,
       0.00312844, 0.00406867, 0.0056503 , 0.00574744, 0.0049308 ,
       0.00563279, 0.00721318, 0.00693029, 0.00589915, 0.00670804,
       0.00637378, 0.01029526, 0.00639015,        nan,        nan,
              nan,        nan, 0.00497409, 0.00255681, 0.0079866 ,
       0.00522391, 0.01385948, 0.00898946, 0.01112362, 0.00908942,
       0.00871605, 0.00711361, 0.006871  , 0.00393386, 0.0067598 ,
       0.00931484, 0.00565303, 0.00809611, 0.00566756, 0.00361613,
       0.00811671, 0.00500766,        nan,        nan,        nan,
              nan, 0.00657687, 0.00562515, 0.01142809, 0.00622861,
       0.0105955 , 0.00463353, 0.01178185, 0.00516712, 0.00881788,
       0.00575561, 0.01123124, 0.00923479, 0.00545637, 0.00379169,
       0.00380254, 0.00622703, 0.01034226, 0.00397632, 0.00508575,
       0.00460088,        nan,        nan,        nan,        nan,
       0.01132919, 0.00525298, 0.00427426, 0.0109316 , 0.00723626,
       0.00769969, 0.00587603, 0.00585891, 0.00989236, 0.01110443,
       0.01133444, 0.00651229, 0.01073537, 0.00628885, 0.00985025,
       0.00580628, 0.00716017, 0.00331532, 0.00568025, 0.01193827,
              nan,        nan,        nan,        nan, 0.0118612 ,
       0.00615566, 0.01008122, 0.00707026, 0.0071552 , 0.00756032,
       0.00768253, 0.00539484, 0.01113508, 0.00320208, 0.00715017,
       0.00654744, 0.01041356, 0.0060691 , 0.01280969, 0.00622852,
       0.00732544, 0.00667084, 0.00765494, 0.00462787,        nan,
              nan,        nan,        nan, 0.00607648, 0.01164767,
       0.00689663, 0.0116525 , 0.00930216, 0.00974404, 0.00525078,
       0.00801296, 0.00610459, 0.00624287, 0.01008058, 0.00707484,
       0.00435052, 0.0077019 , 0.00251335, 0.00434526, 0.01010758,
       0.0089977 , 0.00551525, 0.00686833,        nan,        nan,
              nan,        nan, 0.00619514, 0.00626077, 0.00895694,
       0.00816565, 0.00656695, 0.00919025, 0.00908847, 0.00910411,
       0.0099516 , 0.00929788, 0.0047198 , 0.00886482, 0.00725993,
       0.0074196 , 0.00764728, 0.01094391, 0.00978611, 0.00391576,
       0.00447525, 0.00446443,        nan,        nan,        nan,
              nan, 0.00448722, 0.00930194, 0.00363093, 0.00959596,
       0.00438236, 0.01088935, 0.00464334, 0.00997046, 0.00680591,
       0.00440338, 0.00756363, 0.00941352, 0.00627141, 0.00427875,
       0.00830497, 0.00608206, 0.01145302, 0.00690749, 0.00419474,
       0.00503176,        nan,        nan,        nan,        nan,
       0.00575518, 0.01168324, 0.00613499, 0.00598745, 0.00228405,
       0.01144255, 0.00867305, 0.00504273, 0.00401689, 0.00470141,
       0.00520349, 0.01167448, 0.00739453, 0.00724924, 0.0079419 ,
       0.00538971, 0.00645982, 0.00625523, 0.00782105, 0.00140925,
              nan,        nan,        nan,        nan, 0.00609729,
       0.00820729, 0.0067989 , 0.00504315, 0.00628975, 0.00923905,
       0.00691068, 0.01103061, 0.00568916, 0.00387775, 0.00529781,
       0.00871652, 0.00535203, 0.00973648, 0.00258923, 0.01132462,
       0.00672472, 0.00781845, 0.00880078, 0.00924239,        nan,
              nan,        nan,        nan, 0.00258662, 0.00547308,
       0.00285766, 0.0052323 , 0.0041424 , 0.0104164 , 0.00645843,
       0.00538605, 0.00317022, 0.00860009, 0.00389474, 0.0064626 ,
       0.00261878, 0.00988956, 0.00605569, 0.00508418, 0.0049681 ,
       0.01098383, 0.00202628, 0.01101364]), 'rank_test_score': array([288, 241, 242, 243,   7,   3,   5,   1,  12,  16,  11,  13,  21,
        28,  23,  25,  33,  43,  36,  40,  48,  55,  45,  52, 244, 245,
       246, 255,   8,   2,   4,   6,   9,  10,  14,  15,  19,  27,  22,
        26,  34,  41,  35,  39,  47,  54,  46,  53, 247, 248, 249, 250,
       204, 224, 202, 221, 206, 226, 205, 225, 212, 229, 210, 233, 214,
       239, 213, 235, 219, 238, 217, 240, 272, 258, 286, 285, 201, 222,
       203, 223, 208, 228, 207, 227, 211, 232, 209, 230, 220, 236, 215,
       231, 218, 234, 216, 237, 280, 279, 278, 287, 113, 175, 109, 174,
       136, 186, 132, 188, 157, 191, 153, 189, 168, 198, 171, 195, 181,
       199, 177, 197, 273, 271, 270, 269, 111, 176, 110, 170, 135, 184,
       134, 187, 154, 192, 155, 190, 165, 193, 167, 194, 178, 200, 185,
       196, 268, 267, 266, 265,  61, 119,  60, 114,  83, 139,  81, 141,
       102, 162,  96, 159, 127, 172, 126, 173, 146, 180, 144, 182, 264,
       263, 262, 261,  64, 120,  58, 116,  84, 147,  78, 142, 103, 160,
       107, 156, 129, 166, 133, 169, 148, 182, 143, 179, 260, 259, 256,
       282,  31,  77,  29,  70,  63,  99,  59,  98,  74, 124,  71, 122,
       108, 149, 104, 151, 118, 163, 112, 164, 274, 275, 276, 277,  32,
        80,  30,  69,  62, 105,  56,  93,  75, 123,  72, 121, 106, 152,
       101, 150, 115, 161, 117, 158, 281, 257, 283, 284,  24,  57,  17,
        49,  42,  79,  38,  76,  67,  97,  66,  92,  87, 131,  86, 130,
        89, 145,  90, 138, 254, 253, 252, 251,  20,  51,  18,  50,  44,
        82,  37,  73,  68, 100,  65,  95,  88, 128,  85, 125,  94, 137,
        91, 140], dtype=int32)}
In [ ]:
print("Best score of Random Forest Hyper Tuning using GridSearchCV: %0.3f" % RF_CV_Fit.best_score_)
Best score of Random Forest Hyper Tuning using GridSearchCV: 0.940
In [ ]:
RF_CV_Fit.get_params()
Out[ ]:
{'cv': 5,
 'error_score': nan,
 'estimator': Pipeline(steps=[('vect', CountVectorizer()), ('tfidf', TfidfTransformer()),
                 ('clf', RandomForestClassifier())]),
 'estimator__clf': RandomForestClassifier(),
 'estimator__clf__bootstrap': True,
 'estimator__clf__ccp_alpha': 0.0,
 'estimator__clf__class_weight': None,
 'estimator__clf__criterion': 'gini',
 'estimator__clf__max_depth': None,
 'estimator__clf__max_features': 'auto',
 'estimator__clf__max_leaf_nodes': None,
 'estimator__clf__max_samples': None,
 'estimator__clf__min_impurity_decrease': 0.0,
 'estimator__clf__min_samples_leaf': 1,
 'estimator__clf__min_samples_split': 2,
 'estimator__clf__min_weight_fraction_leaf': 0.0,
 'estimator__clf__n_estimators': 100,
 'estimator__clf__n_jobs': None,
 'estimator__clf__oob_score': False,
 'estimator__clf__random_state': None,
 'estimator__clf__verbose': 0,
 'estimator__clf__warm_start': False,
 'estimator__memory': None,
 'estimator__steps': [('vect', CountVectorizer()),
  ('tfidf', TfidfTransformer()),
  ('clf', RandomForestClassifier())],
 'estimator__tfidf': TfidfTransformer(),
 'estimator__tfidf__norm': 'l2',
 'estimator__tfidf__smooth_idf': True,
 'estimator__tfidf__sublinear_tf': False,
 'estimator__tfidf__use_idf': True,
 'estimator__vect': CountVectorizer(),
 'estimator__vect__analyzer': 'word',
 'estimator__vect__binary': False,
 'estimator__vect__decode_error': 'strict',
 'estimator__vect__dtype': numpy.int64,
 'estimator__vect__encoding': 'utf-8',
 'estimator__vect__input': 'content',
 'estimator__vect__lowercase': True,
 'estimator__vect__max_df': 1.0,
 'estimator__vect__max_features': None,
 'estimator__vect__min_df': 1,
 'estimator__vect__ngram_range': (1, 1),
 'estimator__vect__preprocessor': None,
 'estimator__vect__stop_words': None,
 'estimator__vect__strip_accents': None,
 'estimator__vect__token_pattern': '(?u)\\b\\w\\w+\\b',
 'estimator__vect__tokenizer': None,
 'estimator__vect__vocabulary': None,
 'estimator__verbose': False,
 'n_jobs': -1,
 'param_grid': {'clf__bootstrap': [True],
  'clf__max_depth': [None, 10, 20, 30, 40, 50],
  'clf__max_features': ['auto', 'sqrt'],
  'clf__min_samples_leaf': [None, 1, 2, 4, 8, 10],
  'clf__n_estimators': [100],
  'tfidf__use_idf': (True, False),
  'vect__ngram_range': [(1, 1), (1, 2)]},
 'pre_dispatch': '2*n_jobs',
 'refit': True,
 'return_train_score': False,
 'scoring': None,
 'verbose': 1}

12. Modeling - DL model

In [ ]:
# Load the dataset 
#dataset2_DL = df_DL[df_DL['pred_group'].isna()]
dataset2_DL = pd.read_excel('/content/sample_data/df_DL_Aug.xlsx')
In [ ]:
dataset2_DL.head()
Out[ ]:
Unnamed: 0 Short description Description Caller Assignment group New Assignment Group combined_description pred_group Language
0 0 login issue verified user details. employee and manager na... spxjnwir pjlcoqds GRP_0 GRP_0 login issue verified user details employee an... NaN en
1 1 outlook team my meetings/skype meetings etc are not ap... hmjdrvpb komuaywn GRP_0 GRP_0 outlook team my meetings/skype meetings etc ar... NaN en
2 2 cant log in to vpn hi i cannot log on to vpn best eylqgodm ybqkwiam GRP_0 GRP_0 cant log in to vpn hi i cannot log on to vpn best NaN en
3 3 unable to access hr tool page unable to access hr tool page xbkucsvz gcpydteq GRP_0 GRP_0 unable to access hr tool page unable to access... NaN en
4 4 skype error skype error owlgqjme qhcozdfx GRP_0 GRP_0 skype error skype error NaN no
In [ ]:
dataset2_DL.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7860 entries, 0 to 7859
Data columns (total 9 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Unnamed: 0            7860 non-null   int64  
 1   Short description     7860 non-null   object 
 2   Description           7860 non-null   object 
 3   Caller                7860 non-null   object 
 4   Assignment group      7860 non-null   object 
 5   New Assignment Group  7860 non-null   object 
 6   combined_description  7860 non-null   object 
 7   pred_group            0 non-null      float64
 8   Language              7860 non-null   object 
dtypes: float64(1), int64(1), object(7)
memory usage: 552.8+ KB
In [ ]:
dataset2_DL = dataset2_DL[dataset2_DL['pred_group'].isna()]
In [ ]:
dataset2_DL.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7860 entries, 0 to 7859
Data columns (total 9 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Unnamed: 0            7860 non-null   int64  
 1   Short description     7860 non-null   object 
 2   Description           7860 non-null   object 
 3   Caller                7860 non-null   object 
 4   Assignment group      7860 non-null   object 
 5   New Assignment Group  7860 non-null   object 
 6   combined_description  7860 non-null   object 
 7   pred_group            0 non-null      float64
 8   Language              7860 non-null   object 
dtypes: float64(1), int64(1), object(7)
memory usage: 552.8+ KB
In [ ]:
dataset2_DL.shape
Out[ ]:
(7860, 9)
In [ ]:
# Create a column for target (assignment group) variable
dataset2_DL['group'] = dataset2_DL['New Assignment Group'].str[4:]
#from sklearn. preprocessing import LabelEncoder
#le = LabelEncoder()
#dataset2['group'] = le. fit_transform(dataset2['group'])
dataset2_DL['group'] = dataset2_DL['group'].astype('int8')
dataset2_DL['target'] = dataset2_DL['group'].astype('category').cat.codes

dataset2_DL.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7860 entries, 0 to 7859
Data columns (total 11 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Unnamed: 0            7860 non-null   int64  
 1   Short description     7860 non-null   object 
 2   Description           7860 non-null   object 
 3   Caller                7860 non-null   object 
 4   Assignment group      7860 non-null   object 
 5   New Assignment Group  7860 non-null   object 
 6   combined_description  7860 non-null   object 
 7   pred_group            0 non-null      float64
 8   Language              7860 non-null   object 
 9   group                 7860 non-null   int8   
 10  target                7860 non-null   int8   
dtypes: float64(1), int64(1), int8(2), object(7)
memory usage: 568.1+ KB
In [ ]:
dataset2_DL.groupby(["group", "target"]).size()
Out[ ]:
group  target
0      0         3660
1      1           31
2      2          209
3      3          197
4      4           95
5      5          128
6      6          182
7      7           21
8      8          591
9      9          252
10     10         139
11     11          30
12     12         251
13     13         143
14     14         116
15     15          36
16     16          84
17     17          19
18     18          88
19     19         213
20     20          33
21     21          13
22     22          27
24     23         285
25     24         115
26     25          55
27     26          17
28     27          44
29     28          97
30     29          39
31     30          67
33     31         103
34     32          62
36     33          11
37     34          15
39     35          19
40     36          40
41     37          40
42     38          37
44     39          15
45     40          35
47     41          27
48     42          25
50     43          14
53     44          11
60     45          16
62     46          25
99     47          88
dtype: int64
In [ ]:
dataset2_DL=dataset2_DL.drop(columns=['Unnamed: 0'],axis=1)
In [ ]:
dataset2_DL.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7860 entries, 0 to 7859
Data columns (total 10 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Short description     7860 non-null   object 
 1   Description           7860 non-null   object 
 2   Caller                7860 non-null   object 
 3   Assignment group      7860 non-null   object 
 4   New Assignment Group  7860 non-null   object 
 5   combined_description  7860 non-null   object 
 6   pred_group            0 non-null      float64
 7   Language              7860 non-null   object 
 8   group                 7860 non-null   int8   
 9   target                7860 non-null   int8   
dtypes: float64(1), int8(2), object(7)
memory usage: 506.7+ KB

12.1 Trying DL without augmentation or replacement

In [ ]:
# Create a column for target (assignment group) variable
dataset2_DL_noaug=dataset2_DL.copy()
In [ ]:
max_features = 10000
maxlen = 300
embedding_size = 200
In [ ]:
tokenizer = Tokenizer(num_words = max_features)
tokenizer.fit_on_texts(list(dataset2_DL_noaug['combined_description']))
X = tokenizer.texts_to_sequences(dataset2_DL_noaug['combined_description'])
In [ ]:
X = pad_sequences(X, maxlen = maxlen)
Y = np.asarray(dataset2_DL_noaug['target'])
#print sample headline and lable
print(f'\nsample headline:\n{X[0]}\n\n Label of sample headline: {Y[0]}')
sample headline:
[  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0  46  32 300  28 219 227
   7 123  56 424   2  28  56   3 222   7  29   2  15 626   2  28   1  46
   7 107 390 329  49 175  67  85   1  46  32 297]

 Label of sample headline: 0
In [ ]:
tokenizer.word_index
Out[ ]:
{'to': 1,
 'the': 2,
 'in': 3,
 'job': 4,
 'is': 5,
 'not': 6,
 'and': 7,
 'on': 8,
 'for': 9,
 'i': 10,
 'a': 11,
 'at': 12,
 'no': 13,
 'please': 14,
 'password': 15,
 'scheduler': 16,
 'erp': 17,
 'of': 18,
 'failed': 19,
 'this': 20,
 'tool': 21,
 'it': 22,
 'unable': 23,
 'access': 24,
 'company': 25,
 'with': 26,
 'na': 27,
 'user': 28,
 'reset': 29,
 'yes': 30,
 'my': 31,
 'issue': 32,
 'sid': 33,
 'from': 34,
 'you': 35,
 'person': 36,
 'ticket': 37,
 'error': 38,
 'account': 39,
 'hostname': 40,
 'have': 41,
 'outlook': 42,
 'be': 43,
 'can': 44,
 'are': 45,
 'login': 46,
 'email': 47,
 'help': 48,
 'that': 49,
 'working': 50,
 'am': 51,
 'we': 52,
 'need': 53,
 'system': 54,
 'e': 55,
 'name': 56,
 'as': 57,
 'when': 58,
 'id': 59,
 'has': 60,
 'or': 61,
 'network': 62,
 'by': 63,
 'but': 64,
 'message': 65,
 'if': 66,
 'was': 67,
 'printer': 68,
 'update': 69,
 'new': 70,
 'site': 71,
 'server': 72,
 'vpn': 73,
 'an': 74,
 'engineering': 75,
 'x': 76,
 'out': 77,
 'f': 78,
 'skype': 79,
 'c': 80,
 'me': 81,
 'all': 82,
 'power': 83,
 'down': 84,
 'able': 85,
 'does': 86,
 'see': 87,
 'usa': 88,
 'locked': 89,
 'below': 90,
 'event': 91,
 'cannot': 92,
 'open': 93,
 'crm': 94,
 'd': 95,
 'log': 96,
 'mail': 97,
 'device': 98,
 'phone': 99,
 'will': 100,
 'number': 101,
 'vendor': 102,
 'team': 103,
 'plant': 104,
 'your': 105,
 'inside': 106,
 'check': 107,
 'request': 108,
 'up': 109,
 'windows': 110,
 'hi': 111,
 'circuit': 112,
 'what': 113,
 'do': 114,
 'connect': 115,
 'customer': 116,
 'time': 117,
 'get': 118,
 'change': 119,
 'since': 120,
 'pm': 121,
 'problem': 122,
 'manager': 123,
 'pc': 124,
 'best': 125,
 'backup': 126,
 'microsoft': 127,
 'data': 128,
 'work': 129,
 'platform': 130,
 'collaboration': 131,
 'order': 132,
 'unlock': 133,
 'been': 134,
 'connection': 135,
 'information': 136,
 'et': 137,
 'ip': 138,
 'type': 139,
 'computer': 140,
 'telecom': 141,
 'summary': 142,
 'com': 143,
 'any': 144,
 'management': 145,
 'laptop': 146,
 'outage': 147,
 'tcp': 148,
 'contact': 149,
 'call': 150,
 'after': 151,
 'sales': 152,
 'internet': 153,
 'hr': 154,
 'using': 155,
 'production': 156,
 'group': 157,
 'there': 158,
 'b': 159,
 'print': 160,
 'asa': 161,
 'screen': 162,
 'problems': 163,
 'attached': 164,
 'file': 165,
 'report': 166,
 'our': 167,
 'eu': 168,
 'us': 169,
 'office': 170,
 'available': 171,
 'into': 172,
 'following': 173,
 's': 174,
 'he': 175,
 'now': 176,
 'getting': 177,
 'create': 178,
 'delivery': 179,
 'which': 180,
 'also': 181,
 'could': 182,
 'business': 183,
 'via': 184,
 'address': 185,
 'language': 186,
 'use': 187,
 'same': 188,
 '–': 189,
 'only': 190,
 'other': 191,
 'space': 192,
 'germany': 193,
 'explorer': 194,
 'browser': 195,
 'add': 196,
 'users': 197,
 'inc': 198,
 'maintenance': 199,
 'start': 200,
 'dear': 201,
 'folder': 202,
 'one': 203,
 'telephone': 204,
 'src': 205,
 'dst': 206,
 'application': 207,
 'inwarehouse': 208,
 'client': 209,
 'so': 210,
 "can't": 211,
 'issues': 212,
 'portal': 213,
 'scheduled': 214,
 'inplant': 215,
 'mobile': 216,
 'service': 217,
 'source': 218,
 'details': 219,
 'install': 220,
 'ms': 221,
 'ad': 222,
 'uacyltoe': 223,
 'deny': 224,
 'link': 225,
 'acl': 226,
 'employee': 227,
 'outside': 228,
 'blocked': 229,
 'provider': 230,
 'cert': 231,
 'notified': 232,
 'maint': 233,
 'did': 234,
 'software': 235,
 'some': 236,
 'emails': 237,
 'drive': 238,
 'connected': 239,
 'interface': 240,
 'view': 241,
 'would': 242,
 'again': 243,
 'status': 244,
 'back': 245,
 'remote': 246,
 'sep': 247,
 'needs': 248,
 'global': 249,
 'set': 250,
 'mm': 251,
 'possible': 252,
 'port': 253,
 'tried': 254,
 'reporting': 255,
 'know': 256,
 'hxgaycze': 257,
 'trying': 258,
 'note': 259,
 'files': 260,
 'may': 261,
 'ap': 262,
 'location': 263,
 'slow': 264,
 'received': 265,
 'required': 266,
 'over': 267,
 'disk': 268,
 'these': 269,
 'still': 270,
 'how': 271,
 'should': 272,
 'code': 273,
 'like': 274,
 'active': 275,
 're': 276,
 'his': 277,
 'app': 278,
 'let': 279,
 'nwfodmhc': 280,
 'due': 281,
 'setup': 282,
 'exurcwkm': 283,
 'sw': 284,
 'security': 285,
 'host': 286,
 'agent': 287,
 'changed': 288,
 'urgent': 289,
 'evening': 290,
 'then': 291,
 'provide': 292,
 'm': 293,
 'destination': 294,
 'support': 295,
 'g': 296,
 'resolved': 297,
 'high': 298,
 'printing': 299,
 'verified': 300,
 'monitor': 301,
 'more': 302,
 'priority': 303,
 'process': 304,
 'list': 305,
 'sent': 306,
 'today': 307,
 'passwords': 308,
 'kindly': 309,
 'full': 310,
 'gsc': 311,
 'they': 312,
 'started': 313,
 'showing': 314,
 'morning': 315,
 'machine': 316,
 'running': 317,
 'used': 318,
 'through': 319,
 'excel': 320,
 'expense': 321,
 'try': 322,
 'mii': 323,
 'ess': 324,
 'having': 325,
 'total': 326,
 'wifi': 327,
 'per': 328,
 'confirmed': 329,
 'very': 330,
 'notification': 331,
 'find': 332,
 'while': 333,
 'aug': 334,
 'evened': 335,
 'page': 336,
 'additional': 337,
 'send': 338,
 'found': 339,
 'material': 340,
 'missing': 341,
 'september': 342,
 'u': 343,
 'internal': 344,
 'sincerely': 345,
 'being': 346,
 'had': 347,
 'action': 348,
 'date': 349,
 'http': 350,
 'created': 351,
 'resolve': 352,
 'document': 353,
 'orders': 354,
 'just': 355,
 'exe': 356,
 'top': 357,
 'volume': 358,
 'enter': 359,
 'pls': 360,
 'label': 361,
 'october': 362,
 'august': 363,
 'good': 364,
 'apac': 365,
 'consumed': 366,
 'services': 367,
 'alerts': 368,
 'display': 369,
 'look': 370,
 'hana': 371,
 'agents': 372,
 'sign': 373,
 'last': 374,
 'dell': 375,
 'equipment': 376,
 'got': 377,
 'attachment': 378,
 'about': 379,
 'meeting': 380,
 'run': 381,
 'iphone': 382,
 'count': 383,
 'traffic': 384,
 'old': 385,
 'india': 386,
 'vip': 387,
 'dial': 388,
 'audio': 389,
 'caller': 390,
 "i'm": 391,
 'before': 392,
 'o': 393,
 'installation': 394,
 'domain': 395,
 'fix': 396,
 'hello': 397,
 'updated': 398,
 'wrong': 399,
 'events': 400,
 'supply': 401,
 'make': 402,
 'advise': 403,
 'specify': 404,
 'driver': 405,
 'opening': 406,
 'search': 407,
 'prod': 408,
 'delete': 409,
 'payroll': 410,
 'query': 411,
 'correct': 412,
 'diagnostics': 413,
 'ie': 414,
 'external': 415,
 'times': 416,
 'completed': 417,
 'programdnty': 418,
 'verizon': 419,
 'warning': 420,
 'tools': 421,
 'online': 422,
 'receive': 423,
 'checked': 424,
 'product': 425,
 'activation': 426,
 'multiple': 427,
 't': 428,
 'item': 429,
 'form': 430,
 'reports': 431,
 'warehouse': 432,
 'complete': 433,
 'model': 434,
 'purchasing': 435,
 'why': 436,
 'web': 437,
 'pcap': 438,
 'logon': 439,
 'who': 440,
 'r': 441,
 'version': 442,
 'click': 443,
 'blank': 444,
 'certificate': 445,
 'too': 446,
 'their': 447,
 'incident': 448,
 'defective': 449,
 'communication': 450,
 'because': 451,
 'longer': 452,
 'hub': 453,
 'go': 454,
 'where': 455,
 'response': 456,
 'net': 457,
 'teamviewer': 458,
 'related': 459,
 'shows': 460,
 'load': 461,
 'ws': 462,
 'plm': 463,
 'day': 464,
 'field': 465,
 'show': 466,
 'submit': 467,
 'dn': 468,
 'installed': 469,
 'receiving': 470,
 'documents': 471,
 'home': 472,
 'two': 473,
 'sto': 474,
 'want': 475,
 'automatically': 476,
 'switch': 477,
 'than': 478,
 'cold': 479,
 'center': 480,
 'under': 481,
 'po': 482,
 'called': 483,
 'exchange': 484,
 'accounts': 485,
 'them': 486,
 'ewew': 487,
 "it's": 488,
 'currently': 489,
 'scan': 490,
 'screenshot': 491,
 'content': 492,
 'were': 493,
 'wireless': 494,
 'mails': 495,
 'local': 496,
 'personal': 497,
 'review': 498,
 'p': 499,
 'sir': 500,
 'unlocked': 501,
 'sync': 502,
 'finance': 503,
 'needed': 504,
 'incidents': 505,
 'da': 506,
 'questions': 507,
 'n': 508,
 'ticketing': 509,
 'line': 510,
 'default': 511,
 'anymore': 512,
 'another': 513,
 'pdf': 514,
 'without': 515,
 'off': 516,
 'aerp': 517,
 'w': 518,
 'fine': 519,
 'approved': 520,
 'jul': 521,
 'netweaver': 522,
 'ex': 523,
 'save': 524,
 'distribution': 525,
 'media': 526,
 'hard': 527,
 'copy': 528,
 'kind': 529,
 'its': 530,
 'few': 531,
 'impact': 532,
 'analysis': 533,
 'added': 534,
 'chain': 535,
 'bkwin': 536,
 'bobj': 537,
 'k': 538,
 'responding': 539,
 'tax': 540,
 'launch': 541,
 'free': 542,
 'vid': 543,
 'every': 544,
 'friday': 545,
 'alert': 546,
 'download': 547,
 'detail': 548,
 'she': 549,
 'lean': 550,
 'already': 551,
 'assist': 552,
 'bex': 553,
 'even': 554,
 'admin': 555,
 'fe': 556,
 'website': 557,
 'th': 558,
 'select': 559,
 'several': 560,
 'fw': 561,
 'many': 562,
 'during': 563,
 'monday': 564,
 "'": 565,
 'days': 566,
 'until': 567,
 'stock': 568,
 'pp': 569,
 'next': 570,
 'processing': 571,
 'says': 572,
 'java': 573,
 "doesn't": 574,
 'shot': 575,
 'dsw': 576,
 'cost': 577,
 'database': 578,
 'her': 579,
 'handling': 580,
 'take': 581,
 'assign': 582,
 'calls': 583,
 'netch': 584,
 'yesterday': 585,
 '·': 586,
 'desk': 587,
 'co': 588,
 'loading': 589,
 'settings': 590,
 'desktop': 591,
 'deleted': 592,
 'servers': 593,
 'expired': 594,
 'here': 595,
 'daily': 596,
 'transfer': 597,
 'give': 598,
 'changes': 599,
 'refer': 600,
 'price': 601,
 'calculator': 602,
 'floor': 603,
 'activity': 604,
 'gb': 605,
 'sinkhole': 606,
 'explicit': 607,
 'etc': 608,
 'threshold': 609,
 'correctly': 610,
 'end': 611,
 'post': 612,
 'pl': 613,
 'area': 614,
 'guest': 615,
 'room': 616,
 'systems': 617,
 'rule': 618,
 'both': 619,
 'options': 620,
 'applications': 621,
 'failure': 622,
 'workflow': 623,
 'going': 624,
 'h': 625,
 'advised': 626,
 'drawings': 627,
 'further': 628,
 'items': 629,
 'sartlgeo': 630,
 'shop': 631,
 'approval': 632,
 'south': 633,
 'abended': 634,
 'incorrect': 635,
 'sender': 636,
 'updating': 637,
 'gso': 638,
 'denied': 639,
 'output': 640,
 'outbound': 641,
 "don't": 642,
 'bk': 643,
 'assigned': 644,
 'logging': 645,
 'works': 646,
 'bkbackup': 647,
 'lhqksbdx': 648,
 'located': 649,
 'hotf': 650,
 'transaction': 651,
 'duration': 652,
 'inspector': 653,
 'ascii': 654,
 'hex': 655,
 'week': 656,
 'someone': 657,
 'lock': 658,
 'shared': 659,
 'properly': 660,
 'right': 661,
 'either': 662,
 '\u200e': 663,
 'z': 664,
 'attendance': 665,
 'hp': 666,
 'fail': 667,
 'license': 668,
 'well': 669,
 'first': 670,
 'requested': 671,
 'project': 672,
 'connecting': 673,
 'read': 674,
 'win': 675,
 'packet': 676,
 'calendar': 677,
 'investigate': 678,
 'seems': 679,
 'done': 680,
 'mr': 681,
 'batch': 682,
 'correlation': 683,
 'renew': 684,
 'above': 685,
 'different': 686,
 'immediately': 687,
 'enable': 688,
 'dev': 689,
 'description': 690,
 'interaction': 691,
 'inbound': 692,
 'organization': 693,
 'supervisor': 694,
 'boot': 695,
 'each': 696,
 'average': 697,
 'hours': 698,
 'username': 699,
 'sound': 700,
 'etime': 701,
 'lan': 702,
 'tab': 703,
 'de': 704,
 'employees': 705,
 'remove': 706,
 'engineer': 707,
 'appears': 708,
 'sure': 709,
 'attach': 710,
 'function': 711,
 'control': 712,
 'example': 713,
 'pcs': 714,
 'partner': 715,
 'tuesday': 716,
 'importance': 717,
 'calling': 718,
 'ship': 719,
 'samples': 720,
 'month': 721,
 'administrator': 722,
 'restart': 723,
 'www': 724,
 'forward': 725,
 'current': 726,
 'future': 727,
 'opened': 728,
 'seeing': 729,
 'mailbox': 730,
 'wednesday': 731,
 'technical': 732,
 'occurrence': 733,
 'value': 734,
 'allow': 735,
 'sql': 736,
 'however': 737,
 'blue': 738,
 'repair': 739,
 'thank': 740,
 'keep': 741,
 'coming': 742,
 'write': 743,
 'snp': 744,
 'malware': 745,
 'printed': 746,
 'approve': 747,
 'errors': 748,
 'teams': 749,
 'card': 750,
 'reason': 751,
 'solve': 752,
 'must': 753,
 'configuration': 754,
 'upgrade': 755,
 'relay': 756,
 'jobs': 757,
 'mentioned': 758,
 'temporarily': 759,
 'always': 760,
 'recipient': 761,
 'node': 762,
 'thursday': 763,
 'went': 764,
 'quote': 765,
 'asking': 766,
 'dp': 767,
 'attachments': 768,
 'mit': 769,
 'ab': 770,
 'dhcpd': 771,
 'dhcpack': 772,
 'eth': 773,
 'lease': 774,
 'soc': 775,
 "user's": 776,
 'drawing': 777,
 'stopped': 778,
 'director': 779,
 'training': 780,
 'shipping': 781,
 'box': 782,
 'ping': 783,
 'balancing': 784,
 'close': 785,
 'amerirtca': 786,
 'determined': 787,
 'battery': 788,
 'android': 789,
 'df': 790,
 'escalation': 791,
 'nothing': 792,
 'regarding': 793,
 'disconnected': 794,
 'prohibited': 795,
 'org': 796,
 'profile': 797,
 'once': 798,
 'directory': 799,
 'billing': 800,
 'tracker': 801,
 'soon': 802,
 'drivers': 803,
 'handle': 804,
 'spam': 805,
 'netbios': 806,
 'shown': 807,
 'keeps': 808,
 'provided': 809,
 'wu': 810,
 'drives': 811,
 'departments': 812,
 'reboot': 813,
 'gmbh': 814,
 'infected': 815,
 'purposes': 816,
 'critical': 817,
 'devices': 818,
 'option': 819,
 'ef': 820,
 'disabled': 821,
 'monitoring': 822,
 'window': 823,
 'contain': 824,
 'between': 825,
 'welcome': 826,
 'filesys': 827,
 'valid': 828,
 'entered': 829,
 'something': 830,
 'rakthyesh': 831,
 'lhqsm': 832,
 'cvss': 833,
 'udp': 834,
 'protocol': 835,
 'corresponding': 836,
 'indicate': 837,
 'lost': 838,
 'thanks': 839,
 'needful': 840,
 'confirm': 841,
 'confidential': 842,
 'reply': 843,
 'sending': 844,
 'effective': 845,
 'image': 846,
 'return': 847,
 'facing': 848,
 'printers': 849,
 'j': 850,
 'restore': 851,
 'customers': 852,
 'reference': 853,
 'turn': 854,
 'non': 855,
 'share': 856,
 'release': 857,
 'fixed': 858,
 'method': 859,
 'notify': 860,
 'hrp': 861,
 'mouse': 862,
 'messages': 863,
 'german': 864,
 'recently': 865,
 'symantec': 866,
 'financial': 867,
 'latitude': 868,
 'intended': 869,
 'dc': 870,
 'heu': 871,
 'regen': 872,
 'ac': 873,
 'medium': 874,
 'come': 875,
 'button': 876,
 'applicable': 877,
 'strictly': 878,
 'point': 879,
 'owned': 880,
 'long': 881,
 'dns': 882,
 'configure': 883,
 'minutes': 884,
 'performance': 885,
 'bw': 886,
 'changing': 887,
 'directly': 888,
 'dat': 889,
 'vlan': 890,
 'rpc': 891,
 'firewall': 892,
 'tablet': 893,
 'qa': 894,
 'disclosure': 895,
 'within': 896,
 'travel': 897,
 'past': 898,
 'case': 899,
 'scanner': 900,
 'cell': 901,
 'size': 902,
 'replacement': 903,
 'amount': 904,
 'key': 905,
 'info': 906,
 'discount': 907,
 'arc': 908,
 'delegating': 909,
 'instances': 910,
 'session': 911,
 'basis': 912,
 'him': 913,
 'wy': 914,
 'assistance': 915,
 'mistake': 916,
 'detected': 917,
 'word': 918,
 'rerouted': 919,
 'v': 920,
 "won't": 921,
 'sms': 922,
 'url': 923,
 'ok': 924,
 'cc': 925,
 'pricing': 926,
 'fy': 927,
 'ic': 928,
 'dwfiykeo': 929,
 'argtxmvcumar': 930,
 'station': 931,
 'planned': 932,
 'block': 933,
 'termination': 934,
 'creating': 935,
 'ce': 936,
 'concerns': 937,
 'generating': 938,
 'upload': 939,
 'part': 940,
 'single': 941,
 'freezing': 942,
 'starting': 943,
 'people': 944,
 'shipment': 945,
 'msd': 946,
 'manually': 947,
 'happened': 948,
 'extend': 949,
 'em': 950,
 'own': 951,
 'hxgayczeing': 952,
 'reinstall': 953,
 'fürth': 954,
 'logged': 955,
 'numbers': 956,
 'against': 957,
 'mb': 958,
 'queries': 959,
 'accept': 960,
 'directionality': 961,
 'score': 962,
 'scwx': 963,
 'sherlock': 964,
 'sle': 965,
 'differently': 966,
 'escalating': 967,
 'locky': 968,
 'meetings': 969,
 'cant': 970,
 'null': 971,
 'rtr': 972,
 'side': 973,
 'alerting': 974,
 'conversation': 975,
 'path': 976,
 'quality': 977,
 'permission': 978,
 'none': 979,
 'mp': 980,
 'notes': 981,
 'pro': 982,
 'infopath': 983,
 'utc': 984,
 'icmp': 985,
 'goods': 986,
 'sys': 987,
 'gigabitethernet': 988,
 'made': 989,
 'managing': 990,
 'generated': 991,
 'mfg': 992,
 'returned': 993,
 'shortly': 994,
 'prompt': 995,
 'affected': 996,
 'tc': 997,
 'wanted': 998,
 'operation': 999,
 'least': 1000,
 ...}
In [ ]:
num_words = len(tokenizer.word_index) + 1
print(num_words)
11849
In [ ]:
# Mounting Google Drive
from google.colab import drive
drive.mount('/content/drive')
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
In [ ]:
#Extract Glove embedding zip file
# from zipfile import ZipFile
# with ZipFile('/content/drive/MyDrive/Great_Learning/NLP_sarcasm_detection/glove.6B.200d.txt', 'r') as z:
#   z.extractall()
In [ ]:
EMBEDDING_FILE = '/content/drive/MyDrive/Great_Learning/NLP_sarcasm_detection/glove.6B.200d.txt'

embeddings = {}
for o in open(EMBEDDING_FILE):
    word = o.split(" ")[0]
    #print(word)
    embd = o.split(" ")[1:]
    embd = np.asarray(embd, dtype='float32')
    #print(embd)
    embeddings[word] = embd

# create a weight matrix for words in training docs
embedding_matrix = np.zeros((num_words, 200))

for word, i in tokenizer.word_index.items():
	embedding_vector = embeddings.get(word)
	if embedding_vector is not None:
		embedding_matrix[i] = embedding_vector
In [ ]:
# Create training and test datasets with 80:20 ratio
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size = 0.2, random_state = SEED, shuffle = True)
print('\033[1mShape of the training set:\033[0m', x_train.shape, y_train.shape)
print('\033[1mShape of the test set:\033[0m', x_test.shape, y_test.shape)

print(f'\nNumber of rows in training dataset: {x_train.shape[0]}')
print(f'Number of columns in training dataset: {x_train.shape[1]}')
print(f'Number of unique words in training dataset: {len(np.unique(np.hstack(x_train)))}')

print(f'\nNumber of rows in test dataset: {x_test.shape[0]}')
print(f'Number of columns in test dataset: {x_test.shape[1]}')
print(f'Number of unique words in test dataset: {len(np.unique(np.hstack(x_test)))}')
Shape of the training set: (6288, 300) (6288,)
Shape of the test set: (1572, 300) (1572,)

Number of rows in training dataset: 6288
Number of columns in training dataset: 300
Number of unique words in training dataset: 8657

Number of rows in test dataset: 1572
Number of columns in test dataset: 300
Number of unique words in test dataset: 4447
In [ ]:
# converting y data into categorical (one-hot encoding)
ytrain = to_categorical(y_train)
ytest = to_categorical(y_test)
In [ ]:
print(f'\nsample headline:\n{x_train[50]}\n\n Label of sample headline: {ytrain[50]}\n\n Label of sample headline: {y_train[50]}')
sample headline:
[   0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0 7467
   47   34  623   54   22 4889  865   10  760  377  236   47   34   50
 1976   54    1 7468   81  747 1001    7   64   49 1001  551   13    3
   17   44   35   48   81  107]

 Label of sample headline: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]

 Label of sample headline: 28
In [ ]:
num_class = len(np.unique(y_train))
num_class
Out[ ]:
48
In [ ]:
num_class_test = len(np.unique(y_test))
num_class_test
Out[ ]:
48
In [ ]:
num_class_all = len(np.unique(dataset2_DL['group'].values))
num_class_all
Out[ ]:
48
In [ ]:
input_layer = Input(shape=(maxlen,),dtype=tf.int64)
embed = Embedding(num_words,output_dim=200,input_length=maxlen,weights=[embedding_matrix], trainable=True)(input_layer)  #weights=[embedding_matrix]
lstm=Bidirectional(LSTM(128))(embed)
drop=Dropout(0.3)(lstm)
dense =Dense(100,activation='relu')(drop)
out=Dense(num_class,activation='softmax')(dense)

model_td = Model(input_layer,out)
model_td.compile(loss='categorical_crossentropy',optimizer="adam",metrics=['accuracy'])

model_td.summary()
tf.keras.utils.plot_model(model_td, show_shapes = True)
Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 input_1 (InputLayer)        [(None, 300)]             0         
                                                                 
 embedding (Embedding)       (None, 300, 200)          2369800   
                                                                 
 bidirectional (Bidirectiona  (None, 256)              336896    
 l)                                                              
                                                                 
 dropout (Dropout)           (None, 256)               0         
                                                                 
 dense (Dense)               (None, 100)               25700     
                                                                 
 dense_1 (Dense)             (None, 48)                4848      
                                                                 
=================================================================
Total params: 2,737,244
Trainable params: 2,737,244
Non-trainable params: 0
_________________________________________________________________
Out[ ]:
In [ ]:
es = EarlyStopping(monitor='val_accuracy', mode = 'auto', verbose = 1, patience = 5)  
mc = ModelCheckpoint('model-{epoch:03d}-{val_accuracy:03f}.h5', verbose=1, monitor='val_accuracy',save_best_only=True, mode='auto')
lr_reduction = ReduceLROnPlateau(monitor = 'val_loss', factor = 0.2, patience=2, min_lr=0.0001)
In [ ]:
batch_size = 100
epochs = 10
model_td_history = model_td.fit(x_train,ytrain,batch_size=batch_size, epochs=epochs, callbacks=[es,mc,lr_reduction], validation_data = (x_test, ytest),verbose=True)
Epoch 1/10
63/63 [==============================] - ETA: 0s - loss: 2.4370 - accuracy: 0.5003
Epoch 00001: val_accuracy improved from -inf to 0.54135, saving model to model-001-0.541349.h5
63/63 [==============================] - 10s 68ms/step - loss: 2.4370 - accuracy: 0.5003 - val_loss: 2.0312 - val_accuracy: 0.5413 - lr: 0.0010
Epoch 2/10
62/63 [============================>.] - ETA: 0s - loss: 1.8779 - accuracy: 0.5624
Epoch 00002: val_accuracy improved from 0.54135 to 0.58079, saving model to model-002-0.580789.h5
63/63 [==============================] - 4s 58ms/step - loss: 1.8789 - accuracy: 0.5625 - val_loss: 1.7864 - val_accuracy: 0.5808 - lr: 0.0010
Epoch 3/10
62/63 [============================>.] - ETA: 0s - loss: 1.6585 - accuracy: 0.5908
Epoch 00003: val_accuracy improved from 0.58079 to 0.58397, saving model to model-003-0.583969.h5
63/63 [==============================] - 4s 58ms/step - loss: 1.6592 - accuracy: 0.5899 - val_loss: 1.6954 - val_accuracy: 0.5840 - lr: 0.0010
Epoch 4/10
62/63 [============================>.] - ETA: 0s - loss: 1.4908 - accuracy: 0.6090
Epoch 00004: val_accuracy improved from 0.58397 to 0.60433, saving model to model-004-0.604326.h5
63/63 [==============================] - 4s 58ms/step - loss: 1.4918 - accuracy: 0.6093 - val_loss: 1.6178 - val_accuracy: 0.6043 - lr: 0.0010
Epoch 5/10
62/63 [============================>.] - ETA: 0s - loss: 1.3465 - accuracy: 0.6427
Epoch 00005: val_accuracy improved from 0.60433 to 0.60623, saving model to model-005-0.606234.h5
63/63 [==============================] - 4s 58ms/step - loss: 1.3466 - accuracy: 0.6431 - val_loss: 1.5658 - val_accuracy: 0.6062 - lr: 0.0010
Epoch 6/10
62/63 [============================>.] - ETA: 0s - loss: 1.1958 - accuracy: 0.6732
Epoch 00006: val_accuracy improved from 0.60623 to 0.61260, saving model to model-006-0.612595.h5
63/63 [==============================] - 4s 58ms/step - loss: 1.1954 - accuracy: 0.6737 - val_loss: 1.6019 - val_accuracy: 0.6126 - lr: 0.0010
Epoch 7/10
62/63 [============================>.] - ETA: 0s - loss: 1.0509 - accuracy: 0.7056
Epoch 00007: val_accuracy improved from 0.61260 to 0.62850, saving model to model-007-0.628499.h5
63/63 [==============================] - 4s 59ms/step - loss: 1.0485 - accuracy: 0.7067 - val_loss: 1.5524 - val_accuracy: 0.6285 - lr: 0.0010
Epoch 8/10
62/63 [============================>.] - ETA: 0s - loss: 0.9399 - accuracy: 0.7271
Epoch 00008: val_accuracy did not improve from 0.62850
63/63 [==============================] - 4s 57ms/step - loss: 0.9397 - accuracy: 0.7271 - val_loss: 1.5771 - val_accuracy: 0.6234 - lr: 0.0010
Epoch 9/10
62/63 [============================>.] - ETA: 0s - loss: 0.8081 - accuracy: 0.7603
Epoch 00009: val_accuracy did not improve from 0.62850
63/63 [==============================] - 4s 57ms/step - loss: 0.8101 - accuracy: 0.7597 - val_loss: 1.6516 - val_accuracy: 0.6253 - lr: 0.0010
Epoch 10/10
62/63 [============================>.] - ETA: 0s - loss: 0.6612 - accuracy: 0.8032
Epoch 00010: val_accuracy improved from 0.62850 to 0.63422, saving model to model-010-0.634224.h5
63/63 [==============================] - 4s 58ms/step - loss: 0.6591 - accuracy: 0.8038 - val_loss: 1.6348 - val_accuracy: 0.6342 - lr: 2.0000e-04
In [ ]:
# Evaluate the model & report accuracy
from keras.models import load_model
model_td_saved = load_model("model-010-0.634224.h5")
scores = model_td_saved.evaluate(x_test, ytest, batch_size = 100, verbose = 1)
print('Test accuracy: %.2f%%' % (scores[1]*100))
16/16 [==============================] - 1s 21ms/step - loss: 1.6348 - accuracy: 0.6342
Test accuracy: 63.42%
In [ ]:
#displaying the classification report on test/validation data
yfit = model_td_saved.predict(x_test)
test_predicted = np.argmax ( yfit, axis=-1 )
test = np.argmax ( ytest, axis=-1 )
from sklearn.metrics import classification_report
print(classification_report(test,test_predicted))
              precision    recall  f1-score   support

           0       0.82      0.88      0.85       737
           1       1.00      0.22      0.36         9
           2       0.39      0.48      0.43        46
           3       0.23      0.45      0.30        29
           4       0.55      0.50      0.52        24
           5       0.55      0.27      0.36        22
           6       0.54      0.58      0.56        26
           7       0.00      0.00      0.00         4
           8       0.63      0.83      0.72       129
           9       0.43      0.20      0.27        51
          10       0.57      0.48      0.52        25
          11       0.00      0.00      0.00         6
          12       0.49      0.59      0.54        51
          13       0.25      0.42      0.31        26
          14       0.19      0.22      0.21        18
          15       0.17      0.17      0.17         6
          16       0.33      0.38      0.35        16
          17       1.00      0.25      0.40         4
          18       0.30      0.21      0.25        14
          19       0.36      0.34      0.35        44
          20       0.00      0.00      0.00         9
          21       0.00      0.00      0.00         3
          22       0.00      0.00      0.00         7
          23       0.84      0.92      0.88        52
          24       0.26      0.57      0.35        21
          25       0.29      0.17      0.21        12
          26       0.00      0.00      0.00         3
          27       0.00      0.00      0.00         9
          28       0.11      0.04      0.06        25
          29       1.00      0.09      0.17        11
          30       0.33      0.08      0.13        12
          31       0.28      0.29      0.29        17
          32       0.14      0.07      0.10        14
          33       0.00      0.00      0.00         1
          34       0.00      0.00      0.00         3
          35       0.00      0.00      0.00         2
          36       0.40      0.14      0.21        14
          37       0.33      0.38      0.35         8
          38       1.00      0.12      0.22         8
          39       0.00      0.00      0.00         1
          40       0.00      0.00      0.00         7
          41       0.50      0.17      0.25         6
          42       0.00      0.00      0.00         3
          43       0.00      0.00      0.00         3
          44       0.00      0.00      0.00         3
          45       0.00      0.00      0.00         4
          46       0.33      0.14      0.20         7
          47       0.06      0.05      0.05        20

    accuracy                           0.63      1572
   macro avg       0.31      0.22      0.23      1572
weighted avg       0.61      0.63      0.61      1572

In [ ]:
#visualizing model performance  - loss and accuracy 
f, (ax1, ax2) = plt.subplots(1, 2, figsize = (15, 7.2))
f.suptitle('Monitoring the performance of the model')

ax1.plot(model_td_history.history['loss'], label = 'Train')
ax1.plot(model_td_history.history['val_loss'], label = 'Test')
ax1.set_title('Model Loss')
ax1.legend(['Train', 'Test'])

ax2.plot(model_td_history.history['accuracy'], label = 'Train')
ax2.plot(model_td_history.history['val_accuracy'], label = 'Test')
ax2.set_title('Model Accuracy')
ax2.legend(['Train', 'Test'])

plt.show()

12.2 DL Model with data Augmentation (synonym based)

In [ ]:
# Create training and test datasets with 80:20 ratio
X_train, X_test, y_train, y_test = train_test_split(dataset2_DL.combined_description, 
                                                    dataset2_DL.target, 
                                                    test_size=0.20, 
                                                    random_state=SEED)
print('\033[1mShape of the training set:\033[0m', X_train.shape, y_train.shape)
print('\033[1mShape of the test set:\033[0m', X_test.shape, y_test.shape)
Shape of the training set: (6288,) (6288,)
Shape of the test set: (1572,) (1572,)
In [ ]:
#Create Dataset 
y_train_df_old = pd.DataFrame(y_train,columns=['target'])
#y_train_df_old_nogrp0 = dataset1[dataset1['Assignment group'] != 'GRP_0']

descending_order = y_train_df_old['target'].value_counts().sort_values(ascending=False).index
plt.subplots(figsize=(22,5))
#add code to rotate the labels
ax=sns.countplot(x='target', data=y_train_df_old, color='royalblue',order=descending_order)
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha="right")
plt.tight_layout()
plt.title('Group count before augmentation')
plt.show()
In [ ]:
#Create Dataset for 'others' i.e all groups which is not part of GRP_0
#y_train_df_old = pd. DataFrame(y_train,columns=['group'])
y_train_df_old_nogrp0 = y_train_df_old[y_train_df_old['target'] != 0]

descending_order = y_train_df_old_nogrp0['target'].value_counts().sort_values(ascending=False).index
plt.subplots(figsize=(22,5))
#add code to rotate the labels
ax=sns.countplot(x='target', data=y_train_df_old_nogrp0, color='royalblue',order=descending_order)
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha="right")
plt.tight_layout()
plt.title('Group count before augmentation excluding GRP_0')
plt.show()
In [ ]:
# Take an example for augmentation
example=aug.augment(X_train[5],n=2)
print('\033[1mOriginal text:\033[0m')
print(X_train[5])
print('_'*100)
print('\033[1mAugmented text:\033[0m')
print(example[0])
print(example[1])
Original text:
unable to log in to engineering tool and skype unable to log in to engineering tool and skype
____________________________________________________________________________________________________
Augmented text:
unable to log in to engineering putz and skype unable to lumber in to engine room tool and skype
unable to log in to engineering tool and skype ineffective to log in to engineering prick and skype
In [ ]:
augmented_sentences=[]
augmented_sentences_labels=[]
for i in X_train.index:
  if y_train[i] in (24,9,12,2,19,3,6):
    temps=aug.augment(X_train[i],n=3)
    for sent in temps:
      augmented_sentences.append(sent)
      augmented_sentences_labels.append(y_train[i])
  elif y_train[i] in (13,10,5,14,24,31,18,28,4,16,47):    
    temps=aug.augment(X_train[i],n=6)
    for sent in temps:
      augmented_sentences.append(sent)
      augmented_sentences_labels.append(y_train[i])
  elif y_train[i] in (30,32,25,27,37,15,38,29,40,36,11,20,1,42,41,22):    
    temps=aug.augment(X_train[i],n=12)
    for sent in temps:
      augmented_sentences.append(sent)
      augmented_sentences_labels.append(y_train[i])
  elif y_train[i] in (46,7,35,17,26,39,34,45,43,33,21,44):    
    temps=aug.augment(X_train[i],n=24)
    for sent in temps:
      augmented_sentences.append(sent)
      augmented_sentences_labels.append(y_train[i])
In [ ]:
X_train=X_train.append(pd.Series(augmented_sentences),ignore_index=True)
y_train=y_train.append(pd.Series(augmented_sentences_labels),ignore_index=True)

print(X_train.shape)
print(y_train.shape)
(24621,)
(24621,)
In [ ]:
y_train_df = pd. DataFrame(y_train,columns=['target'])
In [ ]:
y_train_df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 24621 entries, 0 to 24620
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   target  24621 non-null  int64
dtypes: int64(1)
memory usage: 192.5 KB
In [ ]:
#Create Dataset 
#y_train_df_old_nogrp0 = dataset1[dataset1['Assignment group'] != 'GRP_0']

descending_order = y_train_df['target'].value_counts().sort_values(ascending=False).index
plt.subplots(figsize=(22,5))
#add code to rotate the labels
ax=sns.countplot(x='target', data=y_train_df, color='royalblue',order=descending_order)
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha="right")
plt.tight_layout()
plt.title('Group count after augmentation')
plt.show()
In [ ]:
#Create Dataset for 'others' i.e all groups which is not part of GRP_0
#y_train_df_old = pd. DataFrame(y_train,columns=['group'])
y_train_df_nogrp0 = y_train_df[y_train_df['target'] != 0]

descending_order = y_train_df_nogrp0['target'].value_counts().sort_values(ascending=False).index
plt.subplots(figsize=(22,5))
#add code to rotate the labels
ax=sns.countplot(x='target', data=y_train_df_nogrp0, color='royalblue',order=descending_order)
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha="right")
plt.tight_layout()
plt.title('Group count after augmentation excluding GRP_0')
plt.show()
In [ ]:
y_train_df.groupby(['target']).size()
Out[ ]:
target
0     2923
1      286
2      652
3      672
4      497
5      742
6      624
7      425
8      462
9      804
10     798
11     312
12     800
13     819
14     686
15     390
16     476
17     375
18     518
19     676
20     312
21     250
22     260
23     233
24     376
25     559
26     350
27     455
28     504
29     364
30     715
31     602
32     624
33     250
34     300
35     425
36     338
37     416
38     377
39     350
40     364
41     273
42     286
43     275
44     200
45     300
46     450
47     476
dtype: int64
In [ ]:
max_features = 10000
maxlen = 300
embedding_size = 200
In [ ]:
tokenizer = Tokenizer(num_words = max_features)
tokenizer.fit_on_texts(X_train)
x_train = tokenizer.texts_to_sequences(X_train)
In [ ]:
x_test = tokenizer.texts_to_sequences(X_test)
In [ ]:
x_train = pad_sequences(x_train, padding='pre', maxlen = maxlen)
x_test = pad_sequences(x_test, padding='pre', maxlen = maxlen)
#Y = np.asarray(dataset2_DL['group'])
In [ ]:
#print sample headline and lable
print(f'\nsample headline:\n{x_train[0]}\n\n Label of sample headline: {y_train[0]}')
sample headline:
[   0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0  235  671   34 1371   63  259
    3   63   32   27  430    7  671   34   10    1  235   44    6   14
   67  353  116  506  177   87]

 Label of sample headline: 37
In [ ]:
tokenizer.word_index
Out[ ]:
{'the': 1,
 'to': 2,
 'in': 3,
 'is': 4,
 'job': 5,
 'and': 6,
 'a': 7,
 'for': 8,
 'not': 9,
 'on': 10,
 'at': 11,
 'of': 12,
 'please': 13,
 'i': 14,
 'this': 15,
 'scheduler': 16,
 'it': 17,
 'erp': 18,
 "'": 19,
 'hostname': 20,
 'be': 21,
 'no': 22,
 'from': 23,
 'with': 24,
 'failed': 25,
 'you': 26,
 'are': 27,
 'tool': 28,
 'company': 29,
 'e': 30,
 'can': 31,
 'we': 32,
 'access': 33,
 'error': 34,
 'that': 35,
 'password': 36,
 'sid': 37,
 'user': 38,
 'have': 39,
 'as': 40,
 'c': 41,
 'issue': 42,
 'd': 43,
 'system': 44,
 'by': 45,
 'my': 46,
 'unable': 47,
 'id': 48,
 'ticket': 49,
 'f': 50,
 'when': 51,
 'help': 52,
 'plant': 53,
 'person': 54,
 'or': 55,
 'need': 56,
 'but': 57,
 'event': 58,
 'has': 59,
 'reset': 60,
 'account': 61,
 'working': 62,
 'usa': 63,
 'email': 64,
 's': 65,
 'if': 66,
 'cannot': 67,
 'an': 68,
 'server': 69,
 'see': 70,
 'name': 71,
 'up': 72,
 'order': 73,
 'message': 74,
 'am': 75,
 'information': 76,
 't': 77,
 'data': 78,
 'crm': 79,
 'below': 80,
 'was': 81,
 'all': 82,
 'ip': 83,
 'out': 84,
 'x': 85,
 'na': 86,
 'number': 87,
 'work': 88,
 'printer': 89,
 'new': 90,
 'mail': 91,
 'do': 92,
 'customer': 93,
 'team': 94,
 'space': 95,
 'down': 96,
 'me': 97,
 'outlook': 98,
 'check': 99,
 'yes': 100,
 'computer': 101,
 'device': 102,
 'network': 103,
 'phone': 104,
 'get': 105,
 'problem': 106,
 'does': 107,
 'login': 108,
 'will': 109,
 'time': 110,
 'delivery': 111,
 'sales': 112,
 'inside': 113,
 'pm': 114,
 'tcp': 115,
 'log': 116,
 'pc': 117,
 'hi': 118,
 'change': 119,
 'b': 120,
 'your': 121,
 'non': 122,
 'any': 123,
 'file': 124,
 'connection': 125,
 'com': 126,
 'there': 127,
 'attached': 128,
 'been': 129,
 'us': 130,
 'since': 131,
 'able': 132,
 'available': 133,
 'engineering': 134,
 'open': 135,
 'production': 136,
 'create': 137,
 'group': 138,
 'which': 139,
 'inwarehouse': 140,
 'update': 141,
 'request': 142,
 'vpn': 143,
 'only': 144,
 'after': 145,
 'address': 146,
 'best': 147,
 'screen': 148,
 'via': 149,
 'our': 150,
 'also': 151,
 'over': 152,
 'using': 153,
 'report': 154,
 'business': 155,
 'source': 156,
 'what': 157,
 'line': 158,
 'vendor': 159,
 'may': 160,
 'manager': 161,
 'code': 162,
 'other': 163,
 'call': 164,
 'skype': 165,
 'platform': 166,
 'asa': 167,
 'et': 168,
 'following': 169,
 'print': 170,
 'could': 171,
 'same': 172,
 'should': 173,
 'application': 174,
 'these': 175,
 'windows': 176,
 'contact': 177,
 'g': 178,
 'one': 179,
 'set': 180,
 'status': 181,
 'service': 182,
 'germany': 183,
 'host': 184,
 'laptop': 185,
 'type': 186,
 'connect': 187,
 'collaboration': 188,
 'some': 189,
 'so': 190,
 'label': 191,
 'site': 192,
 'uacyltoe': 193,
 'management': 194,
 'use': 195,
 'would': 196,
 'make': 197,
 'mm': 198,
 'port': 199,
 'folder': 200,
 'priority': 201,
 'power': 202,
 'summary': 203,
 'note': 204,
 '–': 205,
 'machine': 206,
 'src': 207,
 'dst': 208,
 'hr': 209,
 'blocked': 210,
 'he': 211,
 'users': 212,
 're': 213,
 'm': 214,
 'software': 215,
 'like': 216,
 'cost': 217,
 'issues': 218,
 'consumed': 219,
 'material': 220,
 'sep': 221,
 'possible': 222,
 'internal': 223,
 'into': 224,
 'now': 225,
 'list': 226,
 'emails': 227,
 'reporting': 228,
 'office': 229,
 'locked': 230,
 'http': 231,
 'getting': 232,
 'employee': 233,
 'how': 234,
 'mii': 235,
 'interface': 236,
 'back': 237,
 'they': 238,
 'volume': 239,
 'add': 240,
 'destination': 241,
 'alerts': 242,
 'internet': 243,
 'still': 244,
 'domain': 245,
 'dear': 246,
 'fail': 247,
 'deny': 248,
 'portal': 249,
 'acl': 250,
 'received': 251,
 'hxgaycze': 252,
 'created': 253,
 'client': 254,
 'process': 255,
 'co': 256,
 'details': 257,
 'microsoft': 258,
 'location': 259,
 'notification': 260,
 'files': 261,
 'high': 262,
 'know': 263,
 'sent': 264,
 'app': 265,
 'due': 266,
 'ms': 267,
 'ap': 268,
 'printing': 269,
 'being': 270,
 'needs': 271,
 'more': 272,
 'telephone': 273,
 'exe': 274,
 'problems': 275,
 'send': 276,
 'per': 277,
 'center': 278,
 'item': 279,
 'pcap': 280,
 'urgent': 281,
 'eu': 282,
 'drive': 283,
 'security': 284,
 'events': 285,
 'count': 286,
 'running': 287,
 'outside': 288,
 'passwords': 289,
 'u': 290,
 'action': 291,
 'r': 292,
 'used': 293,
 'page': 294,
 'aug': 295,
 'traffic': 296,
 'let': 297,
 'slow': 298,
 'technology': 299,
 'showing': 300,
 'agent': 301,
 'nwfodmhc': 302,
 'programdnty': 303,
 'link': 304,
 'why': 305,
 'exurcwkm': 306,
 'found': 307,
 'find': 308,
 'look': 309,
 'warning': 310,
 'orders': 311,
 'switch': 312,
 'enter': 313,
 'required': 314,
 'communication': 315,
 'go': 316,
 'just': 317,
 'view': 318,
 'n': 319,
 'attachment': 320,
 'did': 321,
 'document': 322,
 'content': 323,
 'disk': 324,
 'august': 325,
 'product': 326,
 'backup': 327,
 'post': 328,
 'september': 329,
 'ad': 330,
 'missing': 331,
 'run': 332,
 'unlock': 333,
 'da': 334,
 'language': 335,
 'remote': 336,
 'related': 337,
 'full': 338,
 'sinkhole': 339,
 'provide': 340,
 'then': 341,
 'again': 342,
 'trying': 343,
 'display': 344,
 'task': 345,
 'inc': 346,
 'his': 347,
 'pls': 348,
 'multiple': 349,
 'kindly': 350,
 'than': 351,
 'wrong': 352,
 'even': 353,
 'outage': 354,
 'today': 355,
 'expense': 356,
 'without': 357,
 'start': 358,
 'while': 359,
 'explorer': 360,
 'po': 361,
 'o': 362,
 'correct': 363,
 'give': 364,
 'purchasing': 365,
 'fix': 366,
 'show': 367,
 'java': 368,
 'sql': 369,
 'plm': 370,
 'delete': 371,
 'support': 372,
 'circuit': 373,
 'install': 374,
 'payroll': 375,
 'browser': 376,
 'date': 377,
 'different': 378,
 'area': 379,
 'october': 380,
 'dsw': 381,
 'advise': 382,
 'maintenance': 383,
 'under': 384,
 'about': 385,
 'locky': 386,
 'sincerely': 387,
 'screenshot': 388,
 'had': 389,
 'try': 390,
 'hana': 391,
 'active': 392,
 'receive': 393,
 'through': 394,
 'field': 395,
 'incident': 396,
 'resolve': 397,
 'having': 398,
 'vid': 399,
 'cold': 400,
 'very': 401,
 'them': 402,
 'admin': 403,
 'complete': 404,
 'default': 405,
 'meeting': 406,
 'total': 407,
 'shows': 408,
 'her': 409,
 'dn': 410,
 'global': 411,
 'got': 412,
 'connected': 413,
 'before': 414,
 'telecom': 415,
 'apac': 416,
 'book': 417,
 'detail': 418,
 'automatically': 419,
 'calls': 420,
 'version': 421,
 'where': 422,
 'finance': 423,
 'were': 424,
 'incidents': 425,
 'fe': 426,
 'web': 427,
 'warehouse': 428,
 'assigned': 429,
 'receiving': 430,
 'tried': 431,
 'excel': 432,
 'p': 433,
 'day': 434,
 'ex': 435,
 'last': 436,
 'ascii': 437,
 'hex': 438,
 'h': 439,
 'changed': 440,
 'review': 441,
 'ship': 442,
 'external': 443,
 'already': 444,
 'activity': 445,
 'local': 446,
 'times': 447,
 'sto': 448,
 'price': 449,
 'block': 450,
 'sw': 451,
 'response': 452,
 'good': 453,
 'both': 454,
 'handling': 455,
 'updated': 456,
 'assignments': 457,
 'database': 458,
 'caper': 459,
 'changes': 460,
 'she': 461,
 'chore': 462,
 'agents': 463,
 'model': 464,
 'sys': 465,
 'th': 466,
 'items': 467,
 'vip': 468,
 'processing': 469,
 'outbound': 470,
 'rule': 471,
 'profit': 472,
 'during': 473,
 'fine': 474,
 'because': 475,
 'pdf': 476,
 'certificate': 477,
 'monitoring': 478,
 'tools': 479,
 'evening': 480,
 'hxgayczeing': 481,
 'control': 482,
 'monitor': 483,
 'search': 484,
 'occupation': 485,
 'stock': 486,
 'ab': 487,
 'personal': 488,
 'two': 489,
 'dev': 490,
 'save': 491,
 'documents': 492,
 'inspector': 493,
 'transfer': 494,
 'blank': 495,
 'transaction': 496,
 'longer': 497,
 'example': 498,
 'quote': 499,
 'doesn': 500,
 'desktop': 501,
 'needed': 502,
 'tax': 503,
 'desk': 504,
 'evened': 505,
 'off': 506,
 'options': 507,
 'confirmed': 508,
 'card': 509,
 'download': 510,
 'copy': 511,
 'explicit': 512,
 'dashbankrd': 513,
 'impact': 514,
 'bank': 515,
 'drawing': 516,
 'too': 517,
 'another': 518,
 'de': 519,
 'right': 520,
 'completed': 521,
 'method': 522,
 'defective': 523,
 'morning': 524,
 'numbers': 525,
 'friday': 526,
 'their': 527,
 'investigate': 528,
 'accounts': 529,
 'w': 530,
 'well': 531,
 'mobile': 532,
 'function': 533,
 'shop': 534,
 'part': 535,
 'form': 536,
 'home': 537,
 'floor': 538,
 'infected': 539,
 'someone': 540,
 'printed': 541,
 'approved': 542,
 'many': 543,
 'servers': 544,
 'pgi': 545,
 'incorrect': 546,
 'packet': 547,
 'interaction': 548,
 'south': 549,
 'take': 550,
 'aerp': 551,
 'net': 552,
 'hotf': 553,
 'online': 554,
 'click': 555,
 'return': 556,
 'k': 557,
 'want': 558,
 'each': 559,
 'services': 560,
 'etc': 561,
 'magento': 562,
 'atomic': 563,
 'org': 564,
 'further': 565,
 'applications': 566,
 'versions': 567,
 'refer': 568,
 'project': 569,
 'malware': 570,
 'dns': 571,
 'reason': 572,
 'handle': 573,
 '…': 574,
 'seems': 575,
 'properly': 576,
 'come': 577,
 'write': 578,
 'between': 579,
 'gso': 580,
 'none': 581,
 'bk': 582,
 'em': 583,
 'end': 584,
 'escalation': 585,
 'located': 586,
 'month': 587,
 'delight': 588,
 'who': 589,
 'fw': 590,
 'hello': 591,
 'future': 592,
 'added': 593,
 'node': 594,
 'reports': 595,
 'old': 596,
 'dell': 597,
 'occurrence': 598,
 'select': 599,
 'threshold': 600,
 'questions': 601,
 'several': 602,
 'scheduled': 603,
 'wifi': 604,
 'room': 605,
 'qty': 606,
 'checked': 607,
 'forward': 608,
 'once': 609,
 'exist': 610,
 'workflow': 611,
 'j': 612,
 'calling': 613,
 'sending': 614,
 'www': 615,
 'partner': 616,
 'failure': 617,
 'jobs': 618,
 'word': 619,
 'either': 620,
 'ie': 621,
 'state': 622,
 'supply': 623,
 'isensor': 624,
 'protocol': 625,
 'virus': 626,
 'keybankrd': 627,
 'its': 628,
 'few': 629,
 'injection': 630,
 'engine': 631,
 'symantec': 632,
 'current': 633,
 'soc': 634,
 'inbound': 635,
 'pieces': 636,
 'alert': 637,
 'above': 638,
 'capacity': 639,
 'output': 640,
 'scan': 641,
 'assign': 642,
 'departments': 643,
 'billing': 644,
 'india': 645,
 'ce': 646,
 'point': 647,
 'purposes': 648,
 'centers': 649,
 'dc': 650,
 'corresponding': 651,
 'datacenter': 652,
 'currently': 653,
 'input': 654,
 'started': 655,
 'operation': 656,
 'seeing': 657,
 'settings': 658,
 'systems': 659,
 'coming': 660,
 'null': 661,
 'customers': 662,
 'next': 663,
 'however': 664,
 'something': 665,
 'allow': 666,
 'additional': 667,
 'udp': 668,
 'path': 669,
 'ticketing': 670,
 'technical': 671,
 'sure': 672,
 'button': 673,
 'v': 674,
 'pcs': 675,
 'read': 676,
 'mistake': 677,
 'days': 678,
 'director': 679,
 'here': 680,
 'cert': 681,
 'thank': 682,
 'submit': 683,
 'boot': 684,
 'single': 685,
 'attachments': 686,
 'sartlgeo': 687,
 'installation': 688,
 'win': 689,
 'sync': 690,
 'generating': 691,
 'every': 692,
 'shot': 693,
 'opening': 694,
 'notified': 695,
 'audio': 696,
 'lhqksbdx': 697,
 'follow': 698,
 'operator': 699,
 'cc': 700,
 'distribution': 701,
 'instead': 702,
 'step': 703,
 'blue': 704,
 'pl': 705,
 'approval': 706,
 'delegating': 707,
 'calendar': 708,
 'box': 709,
 'msd': 710,
 'within': 711,
 'remove': 712,
 'generated': 713,
 'always': 714,
 'requested': 715,
 'put': 716,
 'kind': 717,
 'anymore': 718,
 'tuesday': 719,
 'correctly': 720,
 'shown': 721,
 'confirmation': 722,
 'fd': 723,
 'amount': 724,
 'organization': 725,
 'profile': 726,
 'daily': 727,
 'medium': 728,
 'score': 729,
 'free': 730,
 'ft': 731,
 'directory': 732,
 'krcscfpry': 733,
 'planned': 734,
 'caller': 735,
 'utc': 736,
 'renew': 737,
 'bobj': 738,
 'teams': 739,
 'image': 740,
 'recipient': 741,
 'against': 742,
 'near': 743,
 'confirm': 744,
 'yesterday': 745,
 'opened': 746,
 'employees': 747,
 'cvss': 748,
 'z': 749,
 'way': 750,
 'monday': 751,
 'prod': 752,
 'classification': 753,
 'description': 754,
 'indicate': 755,
 'setup': 756,
 'wireless': 757,
 'fax': 758,
 'maint': 759,
 'ctoc': 760,
 'amerirtca': 761,
 'installed': 762,
 'provider': 763,
 'works': 764,
 'wednesday': 765,
 'load': 766,
 'yet': 767,
 'empty': 768,
 'scwx': 769,
 'sender': 770,
 'reference': 771,
 'replace': 772,
 'escalating': 773,
 'differently': 774,
 'df': 775,
 'directionality': 776,
 'sherlock': 777,
 'dat': 778,
 'spam': 779,
 'gb': 780,
 'inplant': 781,
 'concerns': 782,
 'primary': 783,
 'sle': 784,
 'shipping': 785,
 'hub': 786,
 'messages': 787,
 'such': 788,
 'sms': 789,
 'live': 790,
 'turn': 791,
 'text': 792,
 'media': 793,
 'entered': 794,
 'gsc': 795,
 'route': 796,
 '\u200e': 797,
 'release': 798,
 'going': 799,
 'username': 800,
 'directly': 801,
 'place': 802,
 'iphone': 803,
 'public': 804,
 'lpawxsf': 805,
 'solve': 806,
 'mage': 807,
 'adminhtml': 808,
 'widget': 809,
 'exists': 810,
 'made': 811,
 'dial': 812,
 'android': 813,
 'vulnerability': 814,
 'hours': 815,
 'extended': 816,
 'administrator': 817,
 'keep': 818,
 'logged': 819,
 'controller': 820,
 'setting': 821,
 'resolved': 822,
 'minutes': 823,
 'errors': 824,
 'done': 825,
 'top': 826,
 'most': 827,
 'don': 828,
 'him': 829,
 'accept': 830,
 'mb': 831,
 'drawings': 832,
 'ineffective': 833,
 'readjust': 834,
 'financial': 835,
 'bw': 836,
 'appears': 837,
 'solution': 838,
 'ef': 839,
 'ff': 840,
 'logging': 841,
 'subject': 842,
 'least': 843,
 'deleted': 844,
 'german': 845,
 'condition': 846,
 'immediately': 847,
 'direction': 848,
 'url': 849,
 'clear': 850,
 'ineffectual': 851,
 'infection': 852,
 'enable': 853,
 'master': 854,
 'jul': 855,
 'doc': 856,
 'mouse': 857,
 'sir': 858,
 'approve': 859,
 'intended': 860,
 'hp': 861,
 'repeat': 862,
 'rma': 863,
 'chain': 864,
 'attempt': 865,
 'routing': 866,
 'ee': 867,
 'close': 868,
 'batch': 869,
 'manually': 870,
 'affected': 871,
 'facing': 872,
 'mails': 873,
 'latitude': 874,
 'assist': 875,
 'repair': 876,
 'pricing': 877,
 'ignore': 878,
 'shared': 879,
 'eemw': 880,
 'asset': 881,
 'called': 882,
 'case': 883,
 'supervisor': 884,
 'assistance': 885,
 'quantity': 886,
 'asking': 887,
 'value': 888,
 'gr': 889,
 'long': 890,
 'happened': 891,
 'duration': 892,
 'object': 893,
 "can't": 894,
 'tos': 895,
 'cancel': 896,
 'displayed': 897,
 'shipment': 898,
 'ess': 899,
 'abended': 900,
 'attach': 901,
 'procedure': 902,
 'fy': 903,
 'ping': 904,
 'license': 905,
 'firewall': 906,
 'thursday': 907,
 'battery': 908,
 'failagain': 909,
 'ransomware': 910,
 'picture': 911,
 'query': 912,
 'performance': 913,
 'paper': 914,
 'notes': 915,
 'until': 916,
 'owner': 917,
 'sign': 918,
 'connecting': 919,
 'vlan': 920,
 'first': 921,
 'ecc': 922,
 'substance': 923,
 'generate': 924,
 'key': 925,
 'comes': 926,
 'verified': 927,
 'soon': 928,
 'info': 929,
 'correlation': 930,
 'reported': 931,
 'bad': 932,
 'ac': 933,
 'csd': 934,
 'sound': 935,
 'lead': 936,
 'overview': 937,
 'week': 938,
 'names': 939,
 'af': 940,
 'record': 941,
 'geolocation': 942,
 'mr': 943,
 'nothing': 944,
 'lean': 945,
 'responsible': 946,
 'zsd': 947,
 'pick': 948,
 'instances': 949,
 'managing': 950,
 'driver': 951,
 'auto': 952,
 'level': 953,
 'lease': 954,
 'including': 955,
 'assignment': 956,
 'sensor': 957,
 'lhqsm': 958,
 'upgrade': 959,
 'reboot': 960,
 'extend': 961,
 'dhcpd': 962,
 'dhcpack': 963,
 'eth': 964,
 'relay': 965,
 'examples': 966,
 'mp': 967,
 'determine': 968,
 'php': 969,
 'pro': 970,
 'requests': 971,
 'average': 972,
 'confidential': 973,
 'complaint': 974,
 'transactions': 975,
 'website': 976,
 'says': 977,
 'devices': 978,
 'payment': 979,
 'rate': 980,
 'creating': 981,
 'samples': 982,
 'procedures': 983,
 'addresses': 984,
 'critical': 985,
 'netbios': 986,
 'noris': 987,
 'ask': 988,
 'hard': 989,
 'expired': 990,
 'mailbox': 991,
 'detected': 992,
 'occurred': 993,
 'extra': 994,
 'fu': 995,
 'window': 996,
 'necessary': 997,
 '·': 998,
 'alwaysupservice': 999,
 'responding': 1000,
 ...}
In [ ]:
num_words = len(tokenizer.word_index) + 1
print(num_words)
13619
In [ ]:
# Mounting Google Drive
#from google.colab import drive
#drive.mount('/content/drive')
In [ ]:
#Extract Glove embedding zip file
#from zipfile import ZipFile
#with ZipFile('drive/My Drive/datasets/NLP/sarcasm_detection/Glove.6B.200d.zip', 'r') as z:
  #z.extractall()
In [ ]:
EMBEDDING_FILE = '/content/drive/MyDrive/Great_Learning/NLP_sarcasm_detection/glove.6B.200d.txt'

embeddings = {}
for o in open(EMBEDDING_FILE):
   # word = o.split(" ")[0]
    # print(word)
    embd = o.split(" ")[1:]
    embd = np.asarray(embd, dtype='float32')
    # print(embd)
    embeddings[word] = embd

# create a weight matrix for words in training docs
embedding_matrix = np.zeros((num_words, 200))

for word, i in tokenizer.word_index.items():
	embedding_vector = embeddings.get(word)
	if embedding_vector is not None:
		embedding_matrix[i] = embedding_vector
In [ ]:
#splitting dataset into train and test datasets
#x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size = 0.2, random_state = SEED, shuffle = True)

print(f'\nNumber of rows in training dataset: {x_train.shape[0]}')
print(f'Number of columns in training dataset: {x_train.shape[1]}')
print(f'Number of unique words in training dataset: {len(np.unique(np.hstack(x_train)))}')

print(f'\nNumber of rows in test dataset: {x_test.shape[0]}')
print(f'Number of columns in test dataset: {x_test.shape[1]}')
print(f'Number of unique words in test dataset: {len(np.unique(np.hstack(x_test)))}')
Number of rows in training dataset: 24621
Number of columns in training dataset: 300
Number of unique words in training dataset: 9791

Number of rows in test dataset: 1572
Number of columns in test dataset: 300
Number of unique words in test dataset: 3273
In [ ]:
print(f'\nsample headline:\n{x_train[5]}\n\n Label of sample headline: {y_train[5]}')
sample headline:
[ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0  0 36 60 36 60]

 Label of sample headline: 0
In [ ]:
num_class = len(np.unique(y_train.values))
num_class
Out[ ]:
48
In [ ]:
num_class_test = len(np.unique(y_test.values))
num_class_test
Out[ ]:
48
In [ ]:
num_class_all = len(np.unique(dataset2_DL['group'].values))
num_class_all
Out[ ]:
48
In [ ]:
# converting y data into categorical (one-hot encoding)
ytrain = to_categorical(y_train)
ytest = to_categorical(y_test)
In [ ]:
print(f'\nsample headline:\n{x_train[50]}\n\n Label of sample headline:\n{ytrain[50]}\n\n Label of sample headline:\n{y_train[50]}')
sample headline:
[   0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0 4779
   64   23  611   44   17 3199 1232   14  714  412  189   64   23   62
 1373   44    2 6091   97  859 1085    6   57   35 1085  444   22    3
   18   31   26   52   97   99]

 Label of sample headline:
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]

 Label of sample headline:
28
In [ ]:
ytest.shape[0],ytest.shape[1]
Out[ ]:
(1572, 48)
In [ ]:
input_layer = Input(shape=(maxlen,),dtype=tf.int64)
embed = Embedding(num_words,output_dim=200,input_length=maxlen,weights=[embedding_matrix], trainable=True)(input_layer)  #weights=[embedding_matrix]
lstm=Bidirectional(LSTM(128))(embed)
drop=Dropout(0.3)(lstm)
dense =Dense(100,activation='relu')(drop)
out=Dense(num_class,activation='softmax')(dense)

model_td = Model(input_layer,out)
model_td.compile(loss='categorical_crossentropy',optimizer="adam",metrics=['accuracy'])

model_td.summary()
tf.keras.utils.plot_model(model_td, show_shapes = True)
Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 input_2 (InputLayer)        [(None, 300)]             0         
                                                                 
 embedding_1 (Embedding)     (None, 300, 200)          2723800   
                                                                 
 bidirectional_1 (Bidirectio  (None, 256)              336896    
 nal)                                                            
                                                                 
 dropout_1 (Dropout)         (None, 256)               0         
                                                                 
 dense_2 (Dense)             (None, 100)               25700     
                                                                 
 dense_3 (Dense)             (None, 48)                4848      
                                                                 
=================================================================
Total params: 3,091,244
Trainable params: 3,091,244
Non-trainable params: 0
_________________________________________________________________
Out[ ]:
In [ ]:
es = EarlyStopping(monitor='val_accuracy', mode = 'auto', verbose = 1, patience = 5)  
mc = ModelCheckpoint('model-{epoch:03d}-{val_accuracy:03f}.h5', verbose=1, monitor='val_accuracy',save_best_only=True, mode='auto')
lr_reduction = ReduceLROnPlateau(monitor = 'val_loss', factor = 0.2, patience=2, min_lr=0.0001)
In [ ]:
batch_size = 100
epochs = 10
model_td_history = model_td.fit(x_train,ytrain,batch_size=batch_size, epochs=epochs, callbacks=[es,mc,lr_reduction], validation_data = (x_test, ytest),verbose=True)
Epoch 1/10
246/247 [============================>.] - ETA: 0s - loss: 2.8070 - accuracy: 0.2579
Epoch 00001: val_accuracy improved from -inf to 0.50445, saving model to model-001-0.504453.h5
247/247 [==============================] - 16s 56ms/step - loss: 2.8061 - accuracy: 0.2581 - val_loss: 2.0431 - val_accuracy: 0.5045 - lr: 0.0010
Epoch 2/10
247/247 [==============================] - ETA: 0s - loss: 1.0065 - accuracy: 0.7027
Epoch 00002: val_accuracy improved from 0.50445 to 0.55216, saving model to model-002-0.552163.h5
247/247 [==============================] - 13s 53ms/step - loss: 1.0065 - accuracy: 0.7027 - val_loss: 2.2042 - val_accuracy: 0.5522 - lr: 0.0010
Epoch 3/10
246/247 [============================>.] - ETA: 0s - loss: 0.4488 - accuracy: 0.8731
Epoch 00003: val_accuracy improved from 0.55216 to 0.57824, saving model to model-003-0.578244.h5
247/247 [==============================] - 13s 53ms/step - loss: 0.4485 - accuracy: 0.8732 - val_loss: 2.3411 - val_accuracy: 0.5782 - lr: 0.0010
Epoch 4/10
246/247 [============================>.] - ETA: 0s - loss: 0.2654 - accuracy: 0.9234
Epoch 00004: val_accuracy improved from 0.57824 to 0.59733, saving model to model-004-0.597328.h5
247/247 [==============================] - 13s 53ms/step - loss: 0.2652 - accuracy: 0.9234 - val_loss: 2.4602 - val_accuracy: 0.5973 - lr: 2.0000e-04
Epoch 5/10
246/247 [============================>.] - ETA: 0s - loss: 0.2298 - accuracy: 0.9330
Epoch 00005: val_accuracy did not improve from 0.59733
247/247 [==============================] - 13s 53ms/step - loss: 0.2298 - accuracy: 0.9331 - val_loss: 2.5858 - val_accuracy: 0.5865 - lr: 2.0000e-04
Epoch 6/10
246/247 [============================>.] - ETA: 0s - loss: 0.2097 - accuracy: 0.9383
Epoch 00006: val_accuracy did not improve from 0.59733
247/247 [==============================] - 13s 53ms/step - loss: 0.2097 - accuracy: 0.9382 - val_loss: 2.6226 - val_accuracy: 0.5916 - lr: 1.0000e-04
Epoch 7/10
246/247 [============================>.] - ETA: 0s - loss: 0.1994 - accuracy: 0.9398
Epoch 00007: val_accuracy improved from 0.59733 to 0.61450, saving model to model-007-0.614504.h5
247/247 [==============================] - 13s 53ms/step - loss: 0.1993 - accuracy: 0.9398 - val_loss: 2.6789 - val_accuracy: 0.6145 - lr: 1.0000e-04
Epoch 8/10
246/247 [============================>.] - ETA: 0s - loss: 0.1940 - accuracy: 0.9407
Epoch 00008: val_accuracy did not improve from 0.61450
247/247 [==============================] - 13s 53ms/step - loss: 0.1939 - accuracy: 0.9407 - val_loss: 2.6845 - val_accuracy: 0.6088 - lr: 1.0000e-04
Epoch 9/10
246/247 [============================>.] - ETA: 0s - loss: 0.1865 - accuracy: 0.9423
Epoch 00009: val_accuracy improved from 0.61450 to 0.61768, saving model to model-009-0.617684.h5
247/247 [==============================] - 13s 54ms/step - loss: 0.1866 - accuracy: 0.9422 - val_loss: 2.7936 - val_accuracy: 0.6177 - lr: 1.0000e-04
Epoch 10/10
246/247 [============================>.] - ETA: 0s - loss: 0.1807 - accuracy: 0.9440
Epoch 00010: val_accuracy did not improve from 0.61768
247/247 [==============================] - 13s 53ms/step - loss: 0.1807 - accuracy: 0.9440 - val_loss: 2.8092 - val_accuracy: 0.6177 - lr: 1.0000e-04
In [ ]:
# Evaluate the model & report accuracy
from keras.models import load_model
model_td_saved = load_model("model-009-0.617684.h5")
scores = model_td_saved.evaluate(x_test, ytest, batch_size = 100, verbose = 1)
print('Test accuracy: %.2f%%' % (scores[1]*100))
16/16 [==============================] - 1s 21ms/step - loss: 2.7936 - accuracy: 0.6177
Test accuracy: 61.77%
In [ ]:
#displaying the classification report on test/validation data
yfit = model_td_saved.predict(x_test)
test_predicted = np.argmax ( yfit, axis=-1 )
test = np.argmax ( ytest, axis=-1 )
from sklearn.metrics import classification_report
print(classification_report(test,test_predicted))
              precision    recall  f1-score   support

           0       0.78      0.85      0.82       737
           1       0.23      0.33      0.27         9
           2       0.38      0.24      0.29        46
           3       0.28      0.31      0.30        29
           4       0.48      0.42      0.44        24
           5       0.50      0.36      0.42        22
           6       0.54      0.58      0.56        26
           7       0.00      0.00      0.00         4
           8       0.64      0.76      0.70       129
           9       0.39      0.24      0.29        51
          10       0.65      0.44      0.52        25
          11       0.20      0.17      0.18         6
          12       0.59      0.47      0.52        51
          13       0.58      0.54      0.56        26
          14       0.41      0.39      0.40        18
          15       0.25      0.17      0.20         6
          16       0.25      0.12      0.17        16
          17       0.44      1.00      0.62         4
          18       0.46      0.43      0.44        14
          19       0.27      0.30      0.28        44
          20       0.00      0.00      0.00         9
          21       0.00      0.00      0.00         3
          22       0.38      0.71      0.50         7
          23       0.85      0.87      0.86        52
          24       0.26      0.24      0.25        21
          25       0.08      0.08      0.08        12
          26       0.00      0.00      0.00         3
          27       0.33      0.11      0.17         9
          28       0.53      0.40      0.45        25
          29       0.60      0.27      0.37        11
          30       0.30      0.25      0.27        12
          31       0.32      0.35      0.33        17
          32       0.31      0.36      0.33        14
          33       0.00      0.00      0.00         1
          34       0.00      0.00      0.00         3
          35       0.00      0.00      0.00         2
          36       0.50      0.07      0.12        14
          37       0.25      0.25      0.25         8
          38       0.43      0.38      0.40         8
          39       0.00      0.00      0.00         1
          40       0.00      0.00      0.00         7
          41       0.17      0.33      0.22         6
          42       0.00      0.00      0.00         3
          43       0.00      0.00      0.00         3
          44       0.00      0.00      0.00         3
          45       0.00      0.00      0.00         4
          46       0.00      0.00      0.00         7
          47       0.06      0.05      0.05        20

    accuracy                           0.62      1572
   macro avg       0.29      0.27      0.26      1572
weighted avg       0.60      0.62      0.60      1572

In [ ]:
#visualizing model performance  - loss and accuracy 
f, (ax1, ax2) = plt.subplots(1, 2, figsize = (15, 7.2))
f.suptitle('Monitoring the performance of the model')

ax1.plot(model_td_history.history['loss'], label = 'Train')
ax1.plot(model_td_history.history['val_loss'], label = 'Test')
ax1.set_title('Model Loss')
ax1.legend(['Train', 'Test'])

ax2.plot(model_td_history.history['accuracy'], label = 'Train')
ax2.plot(model_td_history.history['val_accuracy'], label = 'Test')
ax2.set_title('Model Accuracy')
ax2.legend(['Train', 'Test'])

plt.show()

12.3 DL model with Upsampling/resampling technique (to treat class imabalance)

In [ ]:
dataset2_DL_US=dataset2_DL.copy()
In [ ]:
dataset2_DL_US.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7860 entries, 0 to 7859
Data columns (total 10 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Short description     7860 non-null   object 
 1   Description           7860 non-null   object 
 2   Caller                7860 non-null   object 
 3   Assignment group      7860 non-null   object 
 4   New Assignment Group  7860 non-null   object 
 5   combined_description  7860 non-null   object 
 6   pred_group            0 non-null      float64
 7   Language              7860 non-null   object 
 8   group                 7860 non-null   int8   
 9   target                7860 non-null   int8   
dtypes: float64(1), int8(2), object(7)
memory usage: 506.7+ KB
In [ ]:
#Create Dataset for 'others' i.e all groups which is not part of GRP_0
dataset2_DL_nogrp0 = dataset2_DL_US[dataset2_DL_US['New Assignment Group'] != 'GRP_0']

descending_order = dataset2_DL_nogrp0['New Assignment Group'].value_counts().sort_values(ascending=False).index
plt.subplots(figsize=(22,5))
#add code to rotate the labels
ax=sns.countplot(x='New Assignment Group', data=dataset2_DL_nogrp0, color='royalblue',order=descending_order)
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha="right")
plt.tight_layout()
plt.show()
In [ ]:
maxcount = dataset2_DL_nogrp0['New Assignment Group'].value_counts().max()
maxcount
Out[ ]:
591
In [ ]:
dataset2_DL_grp0 = dataset2_DL_US[dataset2_DL_US['New Assignment Group'] == 'GRP_0']
In [ ]:
dataset2_DL_grp0.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 3660 entries, 0 to 7857
Data columns (total 10 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Short description     3660 non-null   object 
 1   Description           3660 non-null   object 
 2   Caller                3660 non-null   object 
 3   Assignment group      3660 non-null   object 
 4   New Assignment Group  3660 non-null   object 
 5   combined_description  3660 non-null   object 
 6   pred_group            0 non-null      float64
 7   Language              3660 non-null   object 
 8   group                 3660 non-null   int8   
 9   target                3660 non-null   int8   
dtypes: float64(1), int8(2), object(7)
memory usage: 264.5+ KB
In [ ]:
## Treat the imbalance in the dataset by resampling to 591

dataset2_DL_nogrp0_upsampled = dataset2_DL_nogrp0[0:0]
for grp in dataset2_DL_nogrp0['New Assignment Group'].unique():
    dataset2_DL_nogrp0_grp = dataset2_DL_nogrp0[dataset2_DL_nogrp0['New Assignment Group'] == grp]
    resampled = resample(dataset2_DL_nogrp0_grp, replace=True, n_samples=int(maxcount), random_state=123)
    dataset2_DL_nogrp0_upsampled = dataset2_DL_nogrp0_upsampled.append(resampled)

dataset2_DL_upsampled=pd.concat([dataset2_DL_nogrp0_upsampled, dataset2_DL_grp0],ignore_index=True)
descending_order = dataset2_DL_upsampled['New Assignment Group'].value_counts().sort_values(ascending=False).index
plt.subplots(figsize=(22,5))
#add code to rotate the labels
ax=sns.countplot(x='New Assignment Group', data=dataset2_DL_upsampled, color='royalblue')
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha="right")
plt.tight_layout()
plt.show()
In [ ]:
# Create a column for target (assignment group) variable
dataset2_DL_upsampled['group'] = dataset2_DL_upsampled['New Assignment Group'].str[4:]
#from sklearn. preprocessing import LabelEncoder
#le = LabelEncoder()
#dataset2['group'] = le. fit_transform(dataset2['group'])
dataset2_DL_upsampled['group'] = dataset2_DL_upsampled['group'].astype('int8')
dataset2_DL_upsampled['target'] = dataset2_DL_upsampled['group'].astype('category').cat.codes

dataset2_DL_upsampled.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 31437 entries, 0 to 31436
Data columns (total 10 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Short description     31437 non-null  object 
 1   Description           31437 non-null  object 
 2   Caller                31437 non-null  object 
 3   Assignment group      31437 non-null  object 
 4   New Assignment Group  31437 non-null  object 
 5   combined_description  31437 non-null  object 
 6   pred_group            0 non-null      float64
 7   Language              31437 non-null  object 
 8   group                 31437 non-null  int8   
 9   target                31437 non-null  int8   
dtypes: float64(1), int8(2), object(7)
memory usage: 2.0+ MB
In [ ]:
dataset2_DL_upsampled.groupby(["group", "target"]).size()
Out[ ]:
group  target
0      0         3660
1      1          591
2      2          591
3      3          591
4      4          591
5      5          591
6      6          591
7      7          591
8      8          591
9      9          591
10     10         591
11     11         591
12     12         591
13     13         591
14     14         591
15     15         591
16     16         591
17     17         591
18     18         591
19     19         591
20     20         591
21     21         591
22     22         591
24     23         591
25     24         591
26     25         591
27     26         591
28     27         591
29     28         591
30     29         591
31     30         591
33     31         591
34     32         591
36     33         591
37     34         591
39     35         591
40     36         591
41     37         591
42     38         591
44     39         591
45     40         591
47     41         591
48     42         591
50     43         591
53     44         591
60     45         591
62     46         591
99     47         591
dtype: int64
In [ ]:
max_features = 10000
maxlen = 300
embedding_size = 200
In [ ]:
tokenizer = Tokenizer(num_words = max_features)
tokenizer.fit_on_texts(list(dataset2_DL_upsampled['combined_description']))
X = tokenizer.texts_to_sequences(dataset2_DL_upsampled['combined_description'])
In [ ]:
X = pad_sequences(X, maxlen = maxlen)
Y = np.asarray(dataset2_DL_upsampled['target'])
#print sample headline and lable
print(f'\nsample headline:\n{X[0]}\n\n Label of sample headline: {Y[0]}')
sample headline:
[   0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
   33  432    2 1232 1188    3  116   57   32 3698  414   33    2    1
  116   57 1232 1188   22    2   80 2831 3699 3700  955  318   35   33
  438    2   29 1128   11  959  574   12   71 3701 3636 2649   25  275
    2  959   15  117  801   16    4 1308    7 3247    2    1  123  248
    5  262    2   29  801 1927]

 Label of sample headline: 1
In [ ]:
tokenizer.word_index
Out[ ]:
{'the': 1,
 'to': 2,
 'in': 3,
 'is': 4,
 'and': 5,
 'job': 6,
 'a': 7,
 'not': 8,
 'for': 9,
 'on': 10,
 'please': 11,
 'i': 12,
 'at': 13,
 'of': 14,
 'this': 15,
 'it': 16,
 'erp': 17,
 'tool': 18,
 'no': 19,
 'from': 20,
 'scheduler': 21,
 'hostname': 22,
 'with': 23,
 'password': 24,
 'are': 25,
 'you': 26,
 'failed': 27,
 'company': 28,
 'be': 29,
 'error': 30,
 'sid': 31,
 'user': 32,
 'access': 33,
 'we': 34,
 'that': 35,
 'have': 36,
 'can': 37,
 'e': 38,
 'issue': 39,
 'as': 40,
 'my': 41,
 'person': 42,
 'unable': 43,
 'reset': 44,
 'c': 45,
 'system': 46,
 'd': 47,
 'need': 48,
 'ticket': 49,
 'help': 50,
 'or': 51,
 'working': 52,
 'id': 53,
 'when': 54,
 'by': 55,
 'account': 56,
 'plant': 57,
 'has': 58,
 'crm': 59,
 'f': 60,
 'but': 61,
 'if': 62,
 'message': 63,
 'see': 64,
 'email': 65,
 'event': 66,
 'new': 67,
 'an': 68,
 'usa': 69,
 'below': 70,
 'am': 71,
 'server': 72,
 'was': 73,
 'name': 74,
 'cannot': 75,
 'order': 76,
 'all': 77,
 'outlook': 78,
 'number': 79,
 'do': 80,
 'data': 81,
 'customer': 82,
 'ip': 83,
 'engineering': 84,
 'phone': 85,
 'na': 86,
 'printer': 87,
 'me': 88,
 'does': 89,
 'space': 90,
 'up': 91,
 'team': 92,
 'yes': 93,
 'pm': 94,
 'x': 95,
 'down': 96,
 'check': 97,
 'out': 98,
 'network': 99,
 'device': 100,
 'login': 101,
 'com': 102,
 'will': 103,
 'b': 104,
 'change': 105,
 'time': 106,
 'your': 107,
 'attached': 108,
 'hi': 109,
 'information': 110,
 'delivery': 111,
 'there': 112,
 'work': 113,
 'mail': 114,
 'using': 115,
 'us': 116,
 'connection': 117,
 'been': 118,
 'sales': 119,
 'get': 120,
 'computer': 121,
 'able': 122,
 'production': 123,
 'log': 124,
 'any': 125,
 'problem': 126,
 'available': 127,
 'pc': 128,
 'open': 129,
 'request': 130,
 'management': 131,
 'uacyltoe': 132,
 'file': 133,
 'create': 134,
 'since': 135,
 'over': 136,
 'screen': 137,
 'update': 138,
 'after': 139,
 'mm': 140,
 'contact': 141,
 'what': 142,
 'best': 143,
 'report': 144,
 'group': 145,
 'via': 146,
 'only': 147,
 'our': 148,
 'also': 149,
 'which': 150,
 'tcp': 151,
 'address': 152,
 's': 153,
 'inside': 154,
 'vendor': 155,
 'following': 156,
 'platform': 157,
 'code': 158,
 'source': 159,
 'manager': 160,
 'other': 161,
 'vpn': 162,
 'call': 163,
 'collaboration': 164,
 'application': 165,
 'status': 166,
 'skype': 167,
 'print': 168,
 'connect': 169,
 'windows': 170,
 "can't": 171,
 'users': 172,
 'et': 173,
 'type': 174,
 'germany': 175,
 'may': 176,
 'one': 177,
 'locked': 178,
 'volume': 179,
 'these': 180,
 'so': 181,
 '–': 182,
 'could': 183,
 'should': 184,
 'consumed': 185,
 'site': 186,
 'would': 187,
 'same': 188,
 'inwarehouse': 189,
 'issues': 190,
 'created': 191,
 'host': 192,
 'passwords': 193,
 'power': 194,
 'sep': 195,
 'summary': 196,
 'g': 197,
 'note': 198,
 'list': 199,
 'hxgaycze': 200,
 'material': 201,
 'priority': 202,
 'add': 203,
 'machine': 204,
 'some': 205,
 'now': 206,
 'software': 207,
 'received': 208,
 'possible': 209,
 'label': 210,
 'he': 211,
 'reporting': 212,
 'asa': 213,
 'they': 214,
 'notification': 215,
 'laptop': 216,
 're': 217,
 'hr': 218,
 'like': 219,
 'emails': 220,
 'problems': 221,
 'use': 222,
 'co': 223,
 'blocked': 224,
 'office': 225,
 'http': 226,
 'ms': 227,
 'eu': 228,
 'mii': 229,
 'know': 230,
 'why': 231,
 'into': 232,
 'alerts': 233,
 'port': 234,
 'details': 235,
 'internal': 236,
 'portal': 237,
 'display': 238,
 'service': 239,
 'getting': 240,
 'dear': 241,
 'high': 242,
 'interface': 243,
 'cost': 244,
 'files': 245,
 'set': 246,
 'folder': 247,
 'process': 248,
 'calls': 249,
 'back': 250,
 'internet': 251,
 'showing': 252,
 'microsoft': 253,
 'being': 254,
 'make': 255,
 'center': 256,
 'let': 257,
 'found': 258,
 'destination': 259,
 'more': 260,
 'hana': 261,
 'needs': 262,
 'employee': 263,
 'unlock': 264,
 'enter': 265,
 'due': 266,
 'still': 267,
 'urgent': 268,
 'per': 269,
 'product': 270,
 'business': 271,
 'attachment': 272,
 'batch': 273,
 'did': 274,
 'required': 275,
 'warning': 276,
 'page': 277,
 'view': 278,
 'telephone': 279,
 'src': 280,
 'dst': 281,
 'location': 282,
 'find': 283,
 'trying': 284,
 'java': 285,
 'sent': 286,
 'action': 287,
 'how': 288,
 'used': 289,
 'missing': 290,
 'look': 291,
 'orders': 292,
 'install': 293,
 'nwfodmhc': 294,
 'send': 295,
 'exurcwkm': 296,
 'september': 297,
 'agent': 298,
 'pcap': 299,
 'app': 300,
 'explorer': 301,
 'exe': 302,
 'm': 303,
 'ap': 304,
 'screenshot': 305,
 'events': 306,
 'start': 307,
 'review': 308,
 'document': 309,
 'different': 310,
 'programdnty': 311,
 'security': 312,
 'traffic': 313,
 'domain': 314,
 'outbound': 315,
 'r': 316,
 'assignments': 317,
 'today': 318,
 'language': 319,
 'hxgayczeing': 320,
 'august': 321,
 'then': 322,
 'pls': 323,
 'again': 324,
 'pdf': 325,
 'his': 326,
 'running': 327,
 'client': 328,
 'search': 329,
 'version': 330,
 'active': 331,
 'full': 332,
 'backup': 333,
 'link': 334,
 'switch': 335,
 'printing': 336,
 'date': 337,
 'slow': 338,
 'ad': 339,
 'tried': 340,
 'dev': 341,
 'content': 342,
 'drive': 343,
 'support': 344,
 'item': 345,
 'deny': 346,
 'advise': 347,
 'circuit': 348,
 'profit': 349,
 'just': 350,
 'acl': 351,
 'show': 352,
 'very': 353,
 'count': 354,
 'related': 355,
 'were': 356,
 'outage': 357,
 'h': 358,
 'october': 359,
 'multiple': 360,
 'resolve': 361,
 'while': 362,
 'sincerely': 363,
 'provide': 364,
 'changes': 365,
 'remote': 366,
 'communication': 367,
 'expense': 368,
 'browser': 369,
 'got': 370,
 'receiving': 371,
 'run': 372,
 'connected': 373,
 'n': 374,
 'complete': 375,
 'having': 376,
 'last': 377,
 'fix': 378,
 'delete': 379,
 'automatically': 380,
 'under': 381,
 'area': 382,
 'field': 383,
 'sql': 384,
 'u': 385,
 'purchasing': 386,
 'evening': 387,
 'correct': 388,
 'admin': 389,
 'without': 390,
 'meeting': 391,
 'telecom': 392,
 'updated': 393,
 'global': 394,
 'da': 395,
 "'": 396,
 'her': 397,
 'inc': 398,
 'output': 399,
 'certificate': 400,
 'outside': 401,
 'dsw': 402,
 'than': 403,
 'well': 404,
 'external': 405,
 'ticketing': 406,
 'try': 407,
 'vid': 408,
 'ex': 409,
 'ab': 410,
 'because': 411,
 'handling': 412,
 'plm': 413,
 'had': 414,
 'defective': 415,
 'shows': 416,
 'locky': 417,
 'post': 418,
 'through': 419,
 'wrong': 420,
 'sw': 421,
 'disk': 422,
 'kindly': 423,
 'ship': 424,
 'card': 425,
 'incident': 426,
 'failure': 427,
 'symantec': 428,
 'assigned': 429,
 'dn': 430,
 "doesn't": 431,
 'needed': 432,
 'fail': 433,
 "it's": 434,
 'example': 435,
 "i'm": 436,
 'monitoring': 437,
 'seems': 438,
 'two': 439,
 'about': 440,
 'submit': 441,
 'incidents': 442,
 'changed': 443,
 'desk': 444,
 'datacenter': 445,
 'p': 446,
 'accounts': 447,
 'default': 448,
 'copy': 449,
 'download': 450,
 'numbers': 451,
 'both': 452,
 'maintenance': 453,
 'go': 454,
 'investigate': 455,
 'form': 456,
 'where': 457,
 'po': 458,
 'o': 459,
 'questions': 460,
 'monitor': 461,
 'processing': 462,
 'jobs': 463,
 'longer': 464,
 'fe': 465,
 'before': 466,
 'evened': 467,
 'who': 468,
 'refer': 469,
 'gso': 470,
 'win': 471,
 'warehouse': 472,
 'vip': 473,
 'drawing': 474,
 'apac': 475,
 'even': 476,
 'ascii': 477,
 'hex': 478,
 'already': 479,
 'mobile': 480,
 'sto': 481,
 'setup': 482,
 'times': 483,
 'line': 484,
 'during': 485,
 'sys': 486,
 'centers': 487,
 'excel': 488,
 'dashbankrd': 489,
 'ignore': 490,
 'correctly': 491,
 'model': 492,
 'she': 493,
 'friday': 494,
 'explicit': 495,
 'impact': 496,
 'shop': 497,
 'shot': 498,
 'options': 499,
 'pieces': 500,
 'transaction': 501,
 'items': 502,
 'database': 503,
 'receive': 504,
 'them': 505,
 'transfer': 506,
 'aug': 507,
 'morning': 508,
 'tuesday': 509,
 'south': 510,
 't': 511,
 'save': 512,
 'th': 513,
 'stock': 514,
 'alert': 515,
 'their': 516,
 'total': 517,
 'virus': 518,
 'detail': 519,
 'added': 520,
 'pgi': 521,
 'control': 522,
 'button': 523,
 'too': 524,
 'another': 525,
 'activity': 526,
 'sinkhole': 527,
 'fine': 528,
 'de': 529,
 'msd': 530,
 'old': 531,
 'rule': 532,
 'prod': 533,
 'day': 534,
 'net': 535,
 'reason': 536,
 'floor': 537,
 'located': 538,
 'desktop': 539,
 'threshold': 540,
 'quote': 541,
 'reports': 542,
 'ie': 543,
 'block': 544,
 'customers': 545,
 'hello': 546,
 'completed': 547,
 'each': 548,
 'inspector': 549,
 'finance': 550,
 'tax': 551,
 'further': 552,
 'tools': 553,
 'scheduled': 554,
 'response': 555,
 'sender': 556,
 'payroll': 557,
 'em': 558,
 'bk': 559,
 'project': 560,
 'confirmed': 561,
 'either': 562,
 'opening': 563,
 'none': 564,
 'days': 565,
 'profile': 566,
 'started': 567,
 'cold': 568,
 'its': 569,
 'documents': 570,
 'audio': 571,
 'engine': 572,
 'workflow': 573,
 'aerp': 574,
 'handle': 575,
 'calling': 576,
 'several': 577,
 'coming': 578,
 'step': 579,
 'soc': 580,
 'assign': 581,
 'z': 582,
 'function': 583,
 'approve': 584,
 'home': 585,
 'versions': 586,
 'image': 587,
 'someone': 588,
 'currently': 589,
 'hotf': 590,
 'future': 591,
 'indicate': 592,
 'generating': 593,
 'local': 594,
 'packet': 595,
 'printed': 596,
 'inbound': 597,
 'checked': 598,
 'between': 599,
 'magento': 600,
 'web': 601,
 'www': 602,
 'url': 603,
 'properly': 604,
 'many': 605,
 'notified': 606,
 'month': 607,
 'systems': 608,
 'forward': 609,
 'krcscfpry': 610,
 'boot': 611,
 'part': 612,
 'approved': 613,
 'value': 614,
 'ft': 615,
 'wifi': 616,
 'reported': 617,
 'escalation': 618,
 'select': 619,
 'blank': 620,
 'pl': 621,
 'end': 622,
 'within': 623,
 'additional': 624,
 'india': 625,
 'cert': 626,
 'dell': 627,
 'click': 628,
 'done': 629,
 'anymore': 630,
 'tickets': 631,
 'few': 632,
 'restart': 633,
 'sure': 634,
 'good': 635,
 'deleted': 636,
 'method': 637,
 'corresponding': 638,
 'technical': 639,
 'pos': 640,
 'interaction': 641,
 'want': 642,
 'fw': 643,
 'incorrect': 644,
 'nothing': 645,
 'pcs': 646,
 'seeing': 647,
 'directly': 648,
 'plan': 649,
 'ping': 650,
 'supply': 651,
 'maint': 652,
 'injection': 653,
 'concerns': 654,
 'mfg': 655,
 'solve': 656,
 'room': 657,
 'right': 658,
 'node': 659,
 'k': 660,
 'amount': 661,
 'delegating': 662,
 'attachments': 663,
 'every': 664,
 'online': 665,
 'sync': 666,
 'messages': 667,
 'free': 668,
 'above': 669,
 'generated': 670,
 'always': 671,
 'qty': 672,
 'off': 673,
 'contacts': 674,
 'opportunities': 675,
 'installed': 676,
 'pro': 677,
 'drawings': 678,
 'ce': 679,
 'gsc': 680,
 'upload': 681,
 'appears': 682,
 'instead': 683,
 'renew': 684,
 'entered': 685,
 'provider': 686,
 'j': 687,
 'applications': 688,
 'kind': 689,
 'pull': 690,
 'input': 691,
 'servers': 692,
 'v': 693,
 'escalating': 694,
 'purposes': 695,
 'capacity': 696,
 'scan': 697,
 'w': 698,
 'yesterday': 699,
 'next': 700,
 'affected': 701,
 'thank': 702,
 'logging': 703,
 'malware': 704,
 'return': 705,
 'however': 706,
 'medium': 707,
 'distribution': 708,
 'zsd': 709,
 'hp': 710,
 'remove': 711,
 'fixed': 712,
 'current': 713,
 'occurrence': 714,
 'price': 715,
 'here': 716,
 'employees': 717,
 'userid': 718,
 'mb': 719,
 'edit': 720,
 'load': 721,
 'agents': 722,
 "don't": 723,
 'differently': 724,
 'isensor': 725,
 'once': 726,
 'dc': 727,
 'routing': 728,
 'long': 729,
 'confirmation': 730,
 'results': 731,
 'complaint': 732,
 'settings': 733,
 'repair': 734,
 'allow': 735,
 'bobj': 736,
 'correlation': 737,
 'shipping': 738,
 'replace': 739,
 'mp': 740,
 'something': 741,
 'iphone': 742,
 'box': 743,
 'primary': 744,
 'assistance': 745,
 'creating': 746,
 'installation': 747,
 'planned': 748,
 'bank': 749,
 'amerirtca': 750,
 'infected': 751,
 'wednesday': 752,
 'cc': 753,
 'going': 754,
 'df': 755,
 'put': 756,
 'query': 757,
 'appearing': 758,
 'username': 759,
 'shown': 760,
 'opportstorage': 761,
 'indicator': 762,
 'blue': 763,
 'description': 764,
 'directory': 765,
 'top': 766,
 'cancel': 767,
 'works': 768,
 'ctoc': 769,
 'null': 770,
 'most': 771,
 'supervisor': 772,
 'objects': 773,
 'teams': 774,
 'logged': 775,
 'services': 776,
 'jul': 777,
 'recipient': 778,
 'such': 779,
 'read': 780,
 'spam': 781,
 'gb': 782,
 'csd': 783,
 'udp': 784,
 'utc': 785,
 'facing': 786,
 'important': 787,
 'release': 788,
 'approval': 789,
 'training': 790,
 'gr': 791,
 'org': 792,
 'reference': 793,
 'duration': 794,
 'against': 795,
 'fax': 796,
 'necessary': 797,
 'original': 798,
 'close': 799,
 'departments': 800,
 'immediately': 801,
 'relay': 802,
 'sending': 803,
 'sir': 804,
 'write': 805,
 'forecast': 806,
 'path': 807,
 'operator': 808,
 'average': 809,
 'attempt': 810,
 'upgrade': 811,
 'least': 812,
 'dial': 813,
 'billing': 814,
 'hub': 815,
 'wireless': 816,
 'exists': 817,
 'operation': 818,
 'dynamics': 819,
 'asking': 820,
 'procedure': 821,
 'route': 822,
 'shipment': 823,
 'sms': 824,
 'procedures': 825,
 'point': 826,
 'classification': 827,
 'ee': 828,
 'performance': 829,
 'extended': 830,
 'extend': 831,
 'samples': 832,
 'tooltion': 833,
 'inplant': 834,
 'sogou': 835,
 'hours': 836,
 'exchange': 837,
 'task': 838,
 'dhcpd': 839,
 'dhcpack': 840,
 'eth': 841,
 'lease': 842,
 'near': 843,
 'comes': 844,
 'dns': 845,
 'bw': 846,
 'logic': 847,
 'protocol': 848,
 'called': 849,
 'opened': 850,
 'rqfhiong': 851,
 'case': 852,
 'partner': 853,
 'requests': 854,
 'expired': 855,
 'keybankrd': 856,
 'score': 857,
 'protection': 858,
 'requested': 859,
 'component': 860,
 'pricing': 861,
 'personal': 862,
 'mage': 863,
 'adminhtml': 864,
 'widget': 865,
 "'admin": 866,
 "switch'": 867,
 'directionality': 868,
 'scwx': 869,
 'sherlock': 870,
 'sle': 871,
 'dat': 872,
 'caller': 873,
 'chg': 874,
 'assignment': 875,
 'cvss': 876,
 'existing': 877,
 'oracle': 878,
 'size': 879,
 'accept': 880,
 '·': 881,
 'comments': 882,
 'sign': 883,
 'basis': 884,
 'empty': 885,
 'intended': 886,
 'solution': 887,
 'powder': 888,
 'processed': 889,
 'give': 890,
 'hrp': 891,
 'amar': 892,
 'moved': 893,
 'ecc': 894,
 'yet': 895,
 'siemens': 896,
 'firewall': 897,
 'etc': 898,
 'assist': 899,
 'resources': 900,
 'says': 901,
 'confirm': 902,
 'ef': 903,
 'mailbox': 904,
 'mouse': 905,
 'administrator': 906,
 'resolved': 907,
 'android': 908,
 'php': 909,
 'critical': 910,
 'monday': 911,
 'hq': 912,
 'detected': 913,
 'made': 914,
 'happened': 915,
 'enable': 916,
 'dmprmb': 917,
 'failagain': 918,
 'him': 919,
 'week': 920,
 'must': 921,
 'malicious': 922,
 'verified': 923,
 'ess': 924,
 'regarding': 925,
 'license': 926,
 'paper': 927,
 'abended': 928,
 'jcoerpmanager': 929,
 'omforiginalsexport': 930,
 'costs': 931,
 'npc': 932,
 'owner': 933,
 'first': 934,
 'errors': 935,
 'vulnerability': 936,
 'prohibited': 937,
 'sp': 938,
 'left': 939,
 'accessing': 940,
 'rma': 941,
 'those': 942,
 'come': 943,
 'take': 944,
 'tos': 945,
 'notes': 946,
 'mails': 947,
 'selected': 948,
 'record': 949,
 'records': 950,
 'table': 951,
 'website': 952,
 'mentioned': 953,
 'year': 954,
 'daily': 955,
 'until': 956,
 'reinstall': 957,
 'eng': 958,
 'restore': 959,
 'ora': 960,
 'vlan': 961,
 'ransomware': 962,
 'teamviewer': 963,
 'repeat': 964,
 'ff': 965,
 'verify': 966,
 'visible': 967,
 'public': 968,
 'cancelled': 969,
 'displayed': 970,
 'mit': 971,
 'confidential': 972,
 "tcp'": 973,
 'reach': 974,
 'fd': 975,
 'director': 976,
 'disclosure': 977,
 'inventory': 978,
 'sensor': 979,
 'db': 980,
 'quality': 981,
 'doc': 982,
 'think': 983,
 'recently': 984,
 'provided': 985,
 'non': 986,
 'notify': 987,
 'compromised': 988,
 'lost': 989,
 'determine': 990,
 'driver': 991,
 'occurred': 992,
 'analysis': 993,
 'fields': 994,
 'submitted': 995,
 'credential': 996,
 'anything': 997,
 'including': 998,
 'deletion': 999,
 'qa': 1000,
 ...}
In [ ]:
word_index= tokenizer.word_index
In [ ]:
num_words = len(tokenizer.word_index) + 1
print(num_words)
11703
In [ ]:
# # Mounting Google Drive
# from google.colab import drive
# drive.mount('/content/drive')
In [ ]:
#Extract Glove embedding zip file
#from zipfile import ZipFile
# with ZipFile('/content/drive/MyDrive/Great_Learning/archive.zip', 'r') as z:
#   z.extractall()
In [ ]:
EMBEDDING_FILE = '/content/drive/MyDrive/Great_Learning/NLP_sarcasm_detection/glove.6B.200d.txt'

embeddings = {}
for o in open(EMBEDDING_FILE):
    word = o.split(" ")[0]
    # print(word)
    embd = o.split(" ")[1:]
    embd = np.asarray(embd, dtype='float32')
    # print(embd)
    embeddings[word] = embd

# load the whole embedding into memory
#embeddings_index = dict()
#f = open(project_path+'word2vec_vector.txt')

#for line in f:
#	values = line.split()
#	word = values[0]
#	coefs = np.asarray(values[1:], dtype='float32')
#	embeddings_index[word] = coefs
#f.close()
#print('Loaded %s word vectors.' % len(embeddings_index))

# create a weight matrix for words in training docs
embedding_matrix = np.zeros((num_words, 200))

for word, i in tokenizer.word_index.items():
	embedding_vector = embeddings.get(word)
	if embedding_vector is not None:
		embedding_matrix[i] = embedding_vector
In [ ]:
# Create training and test datasets with 80:20 ratio
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size = 0.2, random_state = SEED, shuffle = True)
print('\033[1mShape of the training set:\033[0m', x_train.shape, y_train.shape)
print('\033[1mShape of the test set:\033[0m', x_test.shape, y_test.shape)

print(f'\nNumber of rows in training dataset: {x_train.shape[0]}')
print(f'Number of columns in training dataset: {x_train.shape[1]}')
print(f'Number of unique words in training dataset: {len(np.unique(np.hstack(x_train)))}')

print(f'\nNumber of rows in test dataset: {x_test.shape[0]}')
print(f'Number of columns in test dataset: {x_test.shape[1]}')
print(f'Number of unique words in test dataset: {len(np.unique(np.hstack(x_test)))}')
Shape of the training set: (25149, 300) (25149,)
Shape of the test set: (6288, 300) (6288,)

Number of rows in training dataset: 25149
Number of columns in training dataset: 300
Number of unique words in training dataset: 9549

Number of rows in test dataset: 6288
Number of columns in test dataset: 300
Number of unique words in test dataset: 7518
In [ ]:
num_class = len(np.unique(y_train))
num_class
Out[ ]:
48
In [ ]:
num_class_test = len(np.unique(y_test))
num_class_test
Out[ ]:
48
In [ ]:
num_class_all = len(np.unique(dataset2_DL_upsampled['group'].values))
num_class_all
Out[ ]:
48
In [ ]:
# converting y data into categorical (one-hot encoding)
ytrain = to_categorical(y_train,48)
ytest = to_categorical(y_test,48)
In [ ]:
print(f'\nsample headline:\n{x_train[50]}\n\n Label of sample headline: {ytrain[50]}\n\n Label of sample headline: {y_train[50]}')
sample headline:
[   0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0   33    2  218   18
 1024   33    2  218   18 1024]

 Label of sample headline: [0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]

 Label of sample headline: 2
In [ ]:
input_layer = Input(shape=(maxlen,),dtype=tf.int64)
embed = Embedding(num_words,output_dim=200,input_length=maxlen,weights=[embedding_matrix], trainable=True)(input_layer)  #weights=[embedding_matrix]
lstm= LSTM(128)(embed)
drop=Dropout(0.3)(lstm)
dense =Dense(100,activation='relu')(drop)
out=Dense(num_class,activation='softmax')(dense)

model_lstm= Model(input_layer,out)
model_lstm.compile(loss='categorical_crossentropy',optimizer="adam",metrics=['accuracy'])

model_lstm.summary()
tf.keras.utils.plot_model(model_lstm, show_shapes = True)
Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 input_2 (InputLayer)        [(None, 300)]             0         
                                                                 
 embedding_1 (Embedding)     (None, 300, 200)          2340600   
                                                                 
 lstm_1 (LSTM)               (None, 128)               168448    
                                                                 
 dropout_1 (Dropout)         (None, 128)               0         
                                                                 
 dense_2 (Dense)             (None, 100)               12900     
                                                                 
 dense_3 (Dense)             (None, 48)                4848      
                                                                 
=================================================================
Total params: 2,526,796
Trainable params: 2,526,796
Non-trainable params: 0
_________________________________________________________________
Out[ ]:
In [ ]:
es = EarlyStopping(monitor='val_accuracy', mode = 'auto', verbose = 1, patience = 5)  
mc = ModelCheckpoint('model-{epoch:03d}-{val_accuracy:03f}.h5', verbose=1, monitor='val_accuracy',save_best_only=True, mode='auto')
lr_reduction = ReduceLROnPlateau(monitor = 'val_loss', factor = 0.2, patience=2, min_lr=0.0001)
In [ ]:
batch_size = 100
epochs = 10
model_lstm_history = model_lstm.fit(x_train,ytrain,batch_size=batch_size, epochs=epochs, 
                                callbacks=[es,mc,lr_reduction], validation_data = (x_test, ytest),verbose=True)
Epoch 1/10
251/252 [============================>.] - ETA: 0s - loss: 2.5652 - accuracy: 0.3347
Epoch 00001: val_accuracy improved from -inf to 0.60226, saving model to model-001-0.602258.h5
252/252 [==============================] - 12s 30ms/step - loss: 2.5631 - accuracy: 0.3352 - val_loss: 1.4336 - val_accuracy: 0.6023 - lr: 0.0010
Epoch 2/10
251/252 [============================>.] - ETA: 0s - loss: 1.0235 - accuracy: 0.7097
Epoch 00002: val_accuracy improved from 0.60226 to 0.81791, saving model to model-002-0.817907.h5
252/252 [==============================] - 7s 28ms/step - loss: 1.0229 - accuracy: 0.7098 - val_loss: 0.6330 - val_accuracy: 0.8179 - lr: 0.0010
Epoch 3/10
251/252 [============================>.] - ETA: 0s - loss: 0.5176 - accuracy: 0.8495
Epoch 00003: val_accuracy improved from 0.81791 to 0.88104, saving model to model-003-0.881043.h5
252/252 [==============================] - 7s 28ms/step - loss: 0.5179 - accuracy: 0.8495 - val_loss: 0.4006 - val_accuracy: 0.8810 - lr: 0.0010
Epoch 4/10
251/252 [============================>.] - ETA: 0s - loss: 0.3443 - accuracy: 0.8982
Epoch 00004: val_accuracy improved from 0.88104 to 0.90776, saving model to model-004-0.907761.h5
252/252 [==============================] - 7s 29ms/step - loss: 0.3440 - accuracy: 0.8983 - val_loss: 0.3131 - val_accuracy: 0.9078 - lr: 0.0010
Epoch 5/10
250/252 [============================>.] - ETA: 0s - loss: 0.2581 - accuracy: 0.9222
Epoch 00005: val_accuracy improved from 0.90776 to 0.92017, saving model to model-005-0.920165.h5
252/252 [==============================] - 7s 29ms/step - loss: 0.2586 - accuracy: 0.9221 - val_loss: 0.2698 - val_accuracy: 0.9202 - lr: 0.0010
Epoch 6/10
251/252 [============================>.] - ETA: 0s - loss: 0.2197 - accuracy: 0.9321
Epoch 00006: val_accuracy did not improve from 0.92017
252/252 [==============================] - 7s 28ms/step - loss: 0.2198 - accuracy: 0.9320 - val_loss: 0.2987 - val_accuracy: 0.9108 - lr: 0.0010
Epoch 7/10
250/252 [============================>.] - ETA: 0s - loss: 0.1987 - accuracy: 0.9373
Epoch 00007: val_accuracy improved from 0.92017 to 0.93003, saving model to model-007-0.930025.h5
252/252 [==============================] - 7s 28ms/step - loss: 0.1989 - accuracy: 0.9372 - val_loss: 0.2462 - val_accuracy: 0.9300 - lr: 0.0010
Epoch 8/10
250/252 [============================>.] - ETA: 0s - loss: 0.1748 - accuracy: 0.9435
Epoch 00008: val_accuracy improved from 0.93003 to 0.93416, saving model to model-008-0.934160.h5
252/252 [==============================] - 7s 29ms/step - loss: 0.1754 - accuracy: 0.9433 - val_loss: 0.2323 - val_accuracy: 0.9342 - lr: 0.0010
Epoch 9/10
250/252 [============================>.] - ETA: 0s - loss: 0.1535 - accuracy: 0.9498
Epoch 00009: val_accuracy improved from 0.93416 to 0.93893, saving model to model-009-0.938931.h5
252/252 [==============================] - 7s 29ms/step - loss: 0.1534 - accuracy: 0.9498 - val_loss: 0.2268 - val_accuracy: 0.9389 - lr: 0.0010
Epoch 10/10
251/252 [============================>.] - ETA: 0s - loss: 0.1559 - accuracy: 0.9489
Epoch 00010: val_accuracy did not improve from 0.93893
252/252 [==============================] - 7s 29ms/step - loss: 0.1560 - accuracy: 0.9489 - val_loss: 0.2531 - val_accuracy: 0.9276 - lr: 0.0010
In [ ]:
# Evaluate the model & report accuracy
from keras.models import load_model
model_lstm_saved = load_model("model-009-0.938931.h5")
scores = model_lstm_saved.evaluate(x_test, ytest, batch_size = 100, verbose = 1)
print('Test accuracy: %.2f%%' % (scores[1]*100))
63/63 [==============================] - 1s 10ms/step - loss: 0.2268 - accuracy: 0.9389
Test accuracy: 93.89%
In [ ]:
model_lstm_saved.get_config()
Out[ ]:
{'input_layers': [['input_2', 0, 0]],
 'layers': [{'class_name': 'InputLayer',
   'config': {'batch_input_shape': (None, 300),
    'dtype': 'int64',
    'name': 'input_2',
    'ragged': False,
    'sparse': False},
   'inbound_nodes': [],
   'name': 'input_2'},
  {'class_name': 'Embedding',
   'config': {'activity_regularizer': None,
    'batch_input_shape': (None, 300),
    'dtype': 'float32',
    'embeddings_constraint': None,
    'embeddings_initializer': {'class_name': 'RandomUniform',
     'config': {'maxval': 0.05, 'minval': -0.05, 'seed': None}},
    'embeddings_regularizer': None,
    'input_dim': 11703,
    'input_length': 300,
    'mask_zero': False,
    'name': 'embedding_1',
    'output_dim': 200,
    'trainable': True},
   'inbound_nodes': [[['input_2', 0, 0, {}]]],
   'name': 'embedding_1'},
  {'class_name': 'LSTM',
   'config': {'activation': 'tanh',
    'activity_regularizer': None,
    'bias_constraint': None,
    'bias_initializer': {'class_name': 'Zeros',
     'config': {},
     'shared_object_id': 5},
    'bias_regularizer': None,
    'dropout': 0.0,
    'dtype': 'float32',
    'go_backwards': False,
    'implementation': 2,
    'kernel_constraint': None,
    'kernel_initializer': {'class_name': 'GlorotUniform',
     'config': {'seed': None},
     'shared_object_id': 3},
    'kernel_regularizer': None,
    'name': 'lstm_1',
    'recurrent_activation': 'sigmoid',
    'recurrent_constraint': None,
    'recurrent_dropout': 0.0,
    'recurrent_initializer': {'class_name': 'Orthogonal',
     'config': {'gain': 1.0, 'seed': None},
     'shared_object_id': 4},
    'recurrent_regularizer': None,
    'return_sequences': False,
    'return_state': False,
    'stateful': False,
    'time_major': False,
    'trainable': True,
    'unit_forget_bias': True,
    'units': 128,
    'unroll': False,
    'use_bias': True},
   'inbound_nodes': [[['embedding_1', 0, 0, {}]]],
   'name': 'lstm_1'},
  {'class_name': 'Dropout',
   'config': {'dtype': 'float32',
    'name': 'dropout_1',
    'noise_shape': None,
    'rate': 0.3,
    'seed': None,
    'trainable': True},
   'inbound_nodes': [[['lstm_1', 0, 0, {}]]],
   'name': 'dropout_1'},
  {'class_name': 'Dense',
   'config': {'activation': 'relu',
    'activity_regularizer': None,
    'bias_constraint': None,
    'bias_initializer': {'class_name': 'Zeros', 'config': {}},
    'bias_regularizer': None,
    'dtype': 'float32',
    'kernel_constraint': None,
    'kernel_initializer': {'class_name': 'GlorotUniform',
     'config': {'seed': None}},
    'kernel_regularizer': None,
    'name': 'dense_2',
    'trainable': True,
    'units': 100,
    'use_bias': True},
   'inbound_nodes': [[['dropout_1', 0, 0, {}]]],
   'name': 'dense_2'},
  {'class_name': 'Dense',
   'config': {'activation': 'softmax',
    'activity_regularizer': None,
    'bias_constraint': None,
    'bias_initializer': {'class_name': 'Zeros', 'config': {}},
    'bias_regularizer': None,
    'dtype': 'float32',
    'kernel_constraint': None,
    'kernel_initializer': {'class_name': 'GlorotUniform',
     'config': {'seed': None}},
    'kernel_regularizer': None,
    'name': 'dense_3',
    'trainable': True,
    'units': 48,
    'use_bias': True},
   'inbound_nodes': [[['dense_2', 0, 0, {}]]],
   'name': 'dense_3'}],
 'name': 'model_1',
 'output_layers': [['dense_3', 0, 0]]}
In [ ]:
#visualizing model performance  - loss and accuracy 
f, (ax1, ax2) = plt.subplots(1, 2, figsize = (15, 7.2))
f.suptitle('Monitoring the performance of the LSTM model')

ax1.plot(model_lstm_history.history['loss'], label = 'Train')
ax1.plot(model_lstm_history.history['val_loss'], label = 'Test')
ax1.set_title('Model Loss')
ax1.legend(['Train', 'Test'])

ax2.plot(model_lstm_history.history['accuracy'], label = 'Train')
ax2.plot(model_lstm_history.history['val_accuracy'], label = 'Test')
ax2.set_title('Model Accuracy')
ax2.legend(['Train', 'Test'])

plt.show()

BI-LSTM

In [ ]:
input_layer = Input(shape=(maxlen,),dtype=tf.int64)
embed = Embedding(num_words,output_dim=200,input_length=maxlen,weights=[embedding_matrix], trainable=True)(input_layer)  #weights=[embedding_matrix]
bi_lstm=Bidirectional(LSTM(128))(embed)
drop=Dropout(0.3)(bi_lstm)
dense =Dense(100,activation='relu')(drop)
out=Dense(num_class,activation='softmax')(dense)

model_bi_lstm = Model(input_layer,out)
model_bi_lstm.compile(loss='categorical_crossentropy',optimizer="adam",metrics=['accuracy'])

model_bi_lstm.summary()
tf.keras.utils.plot_model(model_bi_lstm, show_shapes = True)
Model: "model_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 input_3 (InputLayer)        [(None, 300)]             0         
                                                                 
 embedding_2 (Embedding)     (None, 300, 200)          2340600   
                                                                 
 bidirectional_1 (Bidirectio  (None, 256)              336896    
 nal)                                                            
                                                                 
 dropout_2 (Dropout)         (None, 256)               0         
                                                                 
 dense_4 (Dense)             (None, 100)               25700     
                                                                 
 dense_5 (Dense)             (None, 48)                4848      
                                                                 
=================================================================
Total params: 2,708,044
Trainable params: 2,708,044
Non-trainable params: 0
_________________________________________________________________
Out[ ]:
In [ ]:
es = EarlyStopping(monitor='val_accuracy', mode = 'auto', verbose = 1, patience = 5)  
mc = ModelCheckpoint('model-{epoch:03d}-{val_accuracy:03f}.h5', verbose=1, monitor='val_accuracy',save_best_only=True, mode='auto')
lr_reduction = ReduceLROnPlateau(monitor = 'val_loss', factor = 0.2, patience=2, min_lr=0.0001)
In [ ]:
batch_size = 100
epochs = 10
model_bi_lstm_history = model_bi_lstm.fit(x_train,ytrain,batch_size=batch_size, epochs=epochs, 
                                callbacks=[es,mc,lr_reduction], validation_data = (x_test, ytest),verbose=True)
Epoch 1/10
251/252 [============================>.] - ETA: 0s - loss: 2.4454 - accuracy: 0.3561
Epoch 00001: val_accuracy improved from -inf to 0.64122, saving model to model-001-0.641221.h5
252/252 [==============================] - 17s 59ms/step - loss: 2.4432 - accuracy: 0.3567 - val_loss: 1.3081 - val_accuracy: 0.6412 - lr: 0.0010
Epoch 2/10
251/252 [============================>.] - ETA: 0s - loss: 0.9477 - accuracy: 0.7359
Epoch 00002: val_accuracy improved from 0.64122 to 0.83015, saving model to model-002-0.830153.h5
252/252 [==============================] - 14s 57ms/step - loss: 0.9469 - accuracy: 0.7361 - val_loss: 0.6202 - val_accuracy: 0.8302 - lr: 0.0010
Epoch 3/10
251/252 [============================>.] - ETA: 0s - loss: 0.4857 - accuracy: 0.8615
Epoch 00003: val_accuracy improved from 0.83015 to 0.88232, saving model to model-003-0.882316.h5
252/252 [==============================] - 14s 57ms/step - loss: 0.4863 - accuracy: 0.8614 - val_loss: 0.3994 - val_accuracy: 0.8823 - lr: 0.0010
Epoch 4/10
251/252 [============================>.] - ETA: 0s - loss: 0.3493 - accuracy: 0.8979
Epoch 00004: val_accuracy improved from 0.88232 to 0.91126, saving model to model-004-0.911260.h5
252/252 [==============================] - 14s 57ms/step - loss: 0.3490 - accuracy: 0.8980 - val_loss: 0.3050 - val_accuracy: 0.9113 - lr: 0.0010
Epoch 5/10
251/252 [============================>.] - ETA: 0s - loss: 0.2605 - accuracy: 0.9232
Epoch 00005: val_accuracy improved from 0.91126 to 0.91985, saving model to model-005-0.919847.h5
252/252 [==============================] - 14s 57ms/step - loss: 0.2602 - accuracy: 0.9233 - val_loss: 0.2793 - val_accuracy: 0.9198 - lr: 0.0010
Epoch 6/10
251/252 [============================>.] - ETA: 0s - loss: 0.2097 - accuracy: 0.9359
Epoch 00006: val_accuracy did not improve from 0.91985
252/252 [==============================] - 14s 57ms/step - loss: 0.2097 - accuracy: 0.9359 - val_loss: 0.2787 - val_accuracy: 0.9192 - lr: 0.0010
Epoch 7/10
251/252 [============================>.] - ETA: 0s - loss: 0.1887 - accuracy: 0.9412
Epoch 00007: val_accuracy improved from 0.91985 to 0.93257, saving model to model-007-0.932570.h5
252/252 [==============================] - 14s 57ms/step - loss: 0.1889 - accuracy: 0.9411 - val_loss: 0.2394 - val_accuracy: 0.9326 - lr: 0.0010
Epoch 8/10
251/252 [============================>.] - ETA: 0s - loss: 0.1928 - accuracy: 0.9394
Epoch 00008: val_accuracy did not improve from 0.93257
252/252 [==============================] - 14s 56ms/step - loss: 0.1929 - accuracy: 0.9394 - val_loss: 0.2465 - val_accuracy: 0.9313 - lr: 0.0010
Epoch 9/10
251/252 [============================>.] - ETA: 0s - loss: 0.1581 - accuracy: 0.9491
Epoch 00009: val_accuracy improved from 0.93257 to 0.93559, saving model to model-009-0.935592.h5
252/252 [==============================] - 14s 57ms/step - loss: 0.1579 - accuracy: 0.9492 - val_loss: 0.2339 - val_accuracy: 0.9356 - lr: 0.0010
Epoch 10/10
251/252 [============================>.] - ETA: 0s - loss: 0.1523 - accuracy: 0.9508
Epoch 00010: val_accuracy did not improve from 0.93559
252/252 [==============================] - 14s 57ms/step - loss: 0.1520 - accuracy: 0.9509 - val_loss: 0.2460 - val_accuracy: 0.9283 - lr: 0.0010
In [ ]:
# Evaluate the model & report accuracy
from keras.models import load_model
model_bi_lstm_saved = load_model("model-009-0.935592.h5")
scores = model_bi_lstm_saved.evaluate(x_test, ytest, batch_size = 100, verbose = 1)
print('Test accuracy: %.2f%%' % (scores[1]*100))
63/63 [==============================] - 2s 20ms/step - loss: 0.2339 - accuracy: 0.9356
Test accuracy: 93.56%
In [ ]:
#visualizing model performance  - loss and accuracy 
f, (ax1, ax2) = plt.subplots(1, 2, figsize = (15, 7.2))
f.suptitle('Monitoring the performance of the Bi-LSTM model')

ax1.plot(model_bi_lstm_history.history['loss'], label = 'Train')
ax1.plot(model_bi_lstm_history.history['val_loss'], label = 'Test')
ax1.set_title('Model Loss')
ax1.legend(['Train', 'Test'])

ax2.plot(model_bi_lstm_history.history['accuracy'], label = 'Train')
ax2.plot(model_bi_lstm_history.history['val_accuracy'], label = 'Test')
ax2.set_title('Model Accuracy')
ax2.legend(['Train', 'Test'])

plt.show()

Recurrent Neural Networks (RNN) RNN assigns more weights to the previous data points of sequence. Therefore, this technique is a powerful method for text, string and sequential data classification. Moreover, this technique could be used for image classification as we did in this work. In RNN, the neural net considers the information of previous nodes in a very sophisticated method which allows for better semantic analysis of the structures in the dataset.

Gated Recurrent Unit (GRU)

Gated Recurrent Unit (GRU) is a gating mechanism for RNN which was introduced by J. Chung et al. and K.Cho et al.. GRU is a simplified variant of the LSTM architecture, but there are differences as follows: GRU contains two gates and does not possess any internal memory. Second, non-linearity is not applied.

In [ ]:
from keras.models import Sequential
from keras.layers import Dense, LSTM, TimeDistributed, Activation
from keras.layers import Flatten, Permute, merge, Input
from keras.layers import Embedding
from keras.models import Model
from keras.layers import Input, Dense, multiply, concatenate, Dropout
from keras.layers import GRU, Bidirectional
In [ ]:
input_layer = Input(shape=(maxlen,),dtype=tf.int64)
embed = Embedding(num_words,output_dim=200,input_length=maxlen,weights=[embedding_matrix], trainable=True)(input_layer) #weights=[embedding_matrix]
gru= GRU(128)(embed)
drop=Dropout(0.3)(gru)
dense =Dense(100,activation='relu')(drop)
out=Dense(len((pd.Series(y_train)).unique()),activation='softmax')(dense)   

model_GRU = Model(input_layer,out)
model_GRU.compile(loss='categorical_crossentropy',optimizer="adam",metrics=['accuracy'])

model_GRU.summary()
tf.keras.utils.plot_model(model_GRU, show_shapes = True)
Model: "model_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 input_4 (InputLayer)        [(None, 300)]             0         
                                                                 
 embedding_3 (Embedding)     (None, 300, 200)          2340600   
                                                                 
 gru (GRU)                   (None, 128)               126720    
                                                                 
 dropout_3 (Dropout)         (None, 128)               0         
                                                                 
 dense_6 (Dense)             (None, 100)               12900     
                                                                 
 dense_7 (Dense)             (None, 48)                4848      
                                                                 
=================================================================
Total params: 2,485,068
Trainable params: 2,485,068
Non-trainable params: 0
_________________________________________________________________
Out[ ]:
In [ ]:
es = EarlyStopping(monitor='val_accuracy', mode = 'auto', verbose = 1, patience = 5)  
mc = ModelCheckpoint('model-{epoch:03d}-{val_accuracy:03f}.h5', verbose=1, monitor='val_accuracy',save_best_only=True, mode='auto')
lr_reduction = ReduceLROnPlateau(monitor = 'val_loss', factor = 0.2, patience=2, min_lr=0.0001)
In [ ]:
batch_size = 100
epochs = 10
model_GRU_history = model_GRU.fit(x_train,ytrain,batch_size=batch_size, epochs=epochs, 
                                callbacks=[es,mc,lr_reduction], validation_data = (x_test, ytest),verbose=True)
Epoch 1/10
250/252 [============================>.] - ETA: 0s - loss: 2.6005 - accuracy: 0.3342
Epoch 00001: val_accuracy improved from -inf to 0.63200, saving model to model-001-0.631997.h5
252/252 [==============================] - 8s 26ms/step - loss: 2.5929 - accuracy: 0.3361 - val_loss: 1.3265 - val_accuracy: 0.6320 - lr: 0.0010
Epoch 2/10
250/252 [============================>.] - ETA: 0s - loss: 0.8889 - accuracy: 0.7568
Epoch 00002: val_accuracy improved from 0.63200 to 0.85051, saving model to model-002-0.850509.h5
252/252 [==============================] - 6s 25ms/step - loss: 0.8866 - accuracy: 0.7575 - val_loss: 0.5188 - val_accuracy: 0.8505 - lr: 0.0010
Epoch 3/10
250/252 [============================>.] - ETA: 0s - loss: 0.4205 - accuracy: 0.8801
Epoch 00003: val_accuracy improved from 0.85051 to 0.90506, saving model to model-003-0.905057.h5
252/252 [==============================] - 6s 25ms/step - loss: 0.4213 - accuracy: 0.8800 - val_loss: 0.3396 - val_accuracy: 0.9051 - lr: 0.0010
Epoch 4/10
250/252 [============================>.] - ETA: 0s - loss: 0.2800 - accuracy: 0.9176
Epoch 00004: val_accuracy improved from 0.90506 to 0.92271, saving model to model-004-0.922710.h5
252/252 [==============================] - 6s 25ms/step - loss: 0.2792 - accuracy: 0.9179 - val_loss: 0.2745 - val_accuracy: 0.9227 - lr: 0.0010
Epoch 5/10
250/252 [============================>.] - ETA: 0s - loss: 0.2176 - accuracy: 0.9352
Epoch 00005: val_accuracy improved from 0.92271 to 0.93114, saving model to model-005-0.931139.h5
252/252 [==============================] - 6s 25ms/step - loss: 0.2180 - accuracy: 0.9351 - val_loss: 0.2424 - val_accuracy: 0.9311 - lr: 0.0010
Epoch 6/10
250/252 [============================>.] - ETA: 0s - loss: 0.1810 - accuracy: 0.9451
Epoch 00006: val_accuracy did not improve from 0.93114
252/252 [==============================] - 6s 24ms/step - loss: 0.1809 - accuracy: 0.9450 - val_loss: 0.2558 - val_accuracy: 0.9284 - lr: 0.0010
Epoch 7/10
250/252 [============================>.] - ETA: 0s - loss: 0.1647 - accuracy: 0.9484
Epoch 00007: val_accuracy improved from 0.93114 to 0.93543, saving model to model-007-0.935433.h5
252/252 [==============================] - 6s 25ms/step - loss: 0.1646 - accuracy: 0.9483 - val_loss: 0.2319 - val_accuracy: 0.9354 - lr: 0.0010
Epoch 8/10
252/252 [==============================] - ETA: 0s - loss: 0.1544 - accuracy: 0.9515
Epoch 00008: val_accuracy did not improve from 0.93543
252/252 [==============================] - 6s 24ms/step - loss: 0.1544 - accuracy: 0.9515 - val_loss: 0.2466 - val_accuracy: 0.9332 - lr: 0.0010
Epoch 9/10
250/252 [============================>.] - ETA: 0s - loss: 0.1449 - accuracy: 0.9529
Epoch 00009: val_accuracy improved from 0.93543 to 0.93989, saving model to model-009-0.939885.h5
252/252 [==============================] - 6s 25ms/step - loss: 0.1447 - accuracy: 0.9530 - val_loss: 0.2260 - val_accuracy: 0.9399 - lr: 0.0010
Epoch 10/10
250/252 [============================>.] - ETA: 0s - loss: 0.1383 - accuracy: 0.9545
Epoch 00010: val_accuracy did not improve from 0.93989
252/252 [==============================] - 6s 24ms/step - loss: 0.1377 - accuracy: 0.9547 - val_loss: 0.2384 - val_accuracy: 0.9383 - lr: 0.0010
In [ ]:
# Evaluate the model & report accuracy
from keras.models import load_model
model_GRU_saved = load_model("model-009-0.939885.h5")
scores = model_GRU_saved.evaluate(x_test, ytest, batch_size = 100, verbose = 1)
print('Test accuracy: %.2f%%' % (scores[1]*100))
63/63 [==============================] - 1s 9ms/step - loss: 0.2260 - accuracy: 0.9399
Test accuracy: 93.99%
In [ ]:
model_GRU_saved.get_config()
Out[ ]:
{'input_layers': [['input_4', 0, 0]],
 'layers': [{'class_name': 'InputLayer',
   'config': {'batch_input_shape': (None, 300),
    'dtype': 'int64',
    'name': 'input_4',
    'ragged': False,
    'sparse': False},
   'inbound_nodes': [],
   'name': 'input_4'},
  {'class_name': 'Embedding',
   'config': {'activity_regularizer': None,
    'batch_input_shape': (None, 300),
    'dtype': 'float32',
    'embeddings_constraint': None,
    'embeddings_initializer': {'class_name': 'RandomUniform',
     'config': {'maxval': 0.05, 'minval': -0.05, 'seed': None}},
    'embeddings_regularizer': None,
    'input_dim': 11703,
    'input_length': 300,
    'mask_zero': False,
    'name': 'embedding_3',
    'output_dim': 200,
    'trainable': True},
   'inbound_nodes': [[['input_4', 0, 0, {}]]],
   'name': 'embedding_3'},
  {'class_name': 'GRU',
   'config': {'activation': 'tanh',
    'activity_regularizer': None,
    'bias_constraint': None,
    'bias_initializer': {'class_name': 'Zeros',
     'config': {},
     'shared_object_id': 5},
    'bias_regularizer': None,
    'dropout': 0.0,
    'dtype': 'float32',
    'go_backwards': False,
    'implementation': 2,
    'kernel_constraint': None,
    'kernel_initializer': {'class_name': 'GlorotUniform',
     'config': {'seed': None},
     'shared_object_id': 3},
    'kernel_regularizer': None,
    'name': 'gru',
    'recurrent_activation': 'sigmoid',
    'recurrent_constraint': None,
    'recurrent_dropout': 0.0,
    'recurrent_initializer': {'class_name': 'Orthogonal',
     'config': {'gain': 1.0, 'seed': None},
     'shared_object_id': 4},
    'recurrent_regularizer': None,
    'reset_after': True,
    'return_sequences': False,
    'return_state': False,
    'stateful': False,
    'time_major': False,
    'trainable': True,
    'units': 128,
    'unroll': False,
    'use_bias': True},
   'inbound_nodes': [[['embedding_3', 0, 0, {}]]],
   'name': 'gru'},
  {'class_name': 'Dropout',
   'config': {'dtype': 'float32',
    'name': 'dropout_3',
    'noise_shape': None,
    'rate': 0.3,
    'seed': None,
    'trainable': True},
   'inbound_nodes': [[['gru', 0, 0, {}]]],
   'name': 'dropout_3'},
  {'class_name': 'Dense',
   'config': {'activation': 'relu',
    'activity_regularizer': None,
    'bias_constraint': None,
    'bias_initializer': {'class_name': 'Zeros', 'config': {}},
    'bias_regularizer': None,
    'dtype': 'float32',
    'kernel_constraint': None,
    'kernel_initializer': {'class_name': 'GlorotUniform',
     'config': {'seed': None}},
    'kernel_regularizer': None,
    'name': 'dense_6',
    'trainable': True,
    'units': 100,
    'use_bias': True},
   'inbound_nodes': [[['dropout_3', 0, 0, {}]]],
   'name': 'dense_6'},
  {'class_name': 'Dense',
   'config': {'activation': 'softmax',
    'activity_regularizer': None,
    'bias_constraint': None,
    'bias_initializer': {'class_name': 'Zeros', 'config': {}},
    'bias_regularizer': None,
    'dtype': 'float32',
    'kernel_constraint': None,
    'kernel_initializer': {'class_name': 'GlorotUniform',
     'config': {'seed': None}},
    'kernel_regularizer': None,
    'name': 'dense_7',
    'trainable': True,
    'units': 48,
    'use_bias': True},
   'inbound_nodes': [[['dense_6', 0, 0, {}]]],
   'name': 'dense_7'}],
 'name': 'model_3',
 'output_layers': [['dense_7', 0, 0]]}
In [ ]:
#visualizing model performance  - loss and accuracy 
f, (ax1, ax2) = plt.subplots(1, 2, figsize = (15, 7.2))
f.suptitle('Monitoring the performance of the GRU model')

ax1.plot(model_GRU_history.history['loss'], label = 'Train')
ax1.plot(model_GRU_history.history['val_loss'], label = 'Test')
ax1.set_title('Model Loss')
ax1.legend(['Train', 'Test'])

ax2.plot(model_GRU_history.history['accuracy'], label = 'Train')
ax2.plot(model_GRU_history.history['val_accuracy'], label = 'Test')
ax2.set_title('Model Accuracy')
ax2.legend(['Train', 'Test'])

plt.show()
In [ ]:
result_ML_final = result_ML_US[['accuracy_training','accuracy_test']]
In [ ]:
result_ML_final
Out[ ]:
accuracy_training accuracy_test
Model
Random Forest 95.9919 94.7837
Xgboost 89.6099 86.8162
SVC 94.9223 92.3346
KNN 91.1686 87.4046
Naive Bayes 72.655 69.2589
In [ ]:
result_ML_final.to_excel('result_ML_final.xlsx')
In [ ]:
accuracy_training_LSTM = (model_lstm_saved.evaluate(x_train,ytrain)[1])*100
accuracy_test_LSTM = (model_lstm_saved.evaluate(x_test, ytest)[1])*100

accuracy_training_BiLSTM = (model_bi_lstm_saved.evaluate(x_train,ytrain)[1])*100
accuracy_test_BiLSTM = (model_bi_lstm_saved.evaluate(x_test, ytest)[1])*100

accuracy_training_GRU= (model_GRU_saved.evaluate(x_train,ytrain)[1])*100
accuracy_test_GRU = (model_GRU_saved.evaluate(x_test, ytest)[1])*100
786/786 [==============================] - 6s 7ms/step - loss: 0.1342 - accuracy: 0.9551
197/197 [==============================] - 1s 7ms/step - loss: 0.2268 - accuracy: 0.9389
786/786 [==============================] - 10s 12ms/step - loss: 0.1350 - accuracy: 0.9559
197/197 [==============================] - 2s 12ms/step - loss: 0.2339 - accuracy: 0.9356
786/786 [==============================] - 5s 7ms/step - loss: 0.1258 - accuracy: 0.9585
197/197 [==============================] - 1s 7ms/step - loss: 0.2260 - accuracy: 0.9399
In [ ]:
Result_DL = {'Model' : ['LSTM', 'Bi-LSTM','GRU'],
           'Accuracy score training' :[accuracy_training_LSTM, accuracy_training_BiLSTM, accuracy_training_GRU],
         
         'Accuracy score Test': [accuracy_test_LSTM, accuracy_test_BiLSTM, accuracy_test_GRU]}


Result_DL= pd.DataFrame(Result_DL)
Result_DL
Out[ ]:
Model Accuracy score training Accuracy score Test
0 LSTM 95.510757 93.893129
1 Bi-LSTM 95.594257 93.559158
2 GRU 95.852721 93.988550

12.4 HyperTuning Deep Learning Models

In [ ]:
!pip install hyperas
In [ ]:
!pip install keras-tuner
In [ ]:
from sklearn.model_selection import RandomizedSearchCV

from sklearn.model_selection import KFold
from sklearn.model_selection import train_test_split
from keras.callbacks import ReduceLROnPlateau
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from keras import layers
import hyperas
from hyperas import optim
from hyperas.distributions import choice, uniform
from hyperopt import Trials, STATUS_OK, tpe
from tensorflow import keras
from kerastuner import BayesianOptimization
import keras_tuner as kt
import random

Hyper Parameter Tuning for LSTM

In [ ]:
def neural_network(num_neurons=100,act='relu',
                   dropout=0.3,num_class=num_class,maxlen=maxlen,num_words=num_words):
    
    input_layer = Input(shape=(maxlen,),dtype=tf.int64)
    embed = Embedding(num_words,output_dim=200,input_length=maxlen,weights=[embedding_matrix], trainable=True)(input_layer)
    lstm= LSTM(128)(embed)
    drop=Dropout(dropout)(lstm)
    dense =Dense(num_neurons,activation='relu')(drop)   
    out=Dense(num_class,activation='softmax')(dense)
    model_lstm= Model(input_layer,out)
    model_lstm.compile(loss='categorical_crossentropy',optimizer="adam",metrics=['accuracy'])
    return model_lstm
   
model_lstm = KerasClassifier(build_fn=neural_network,verbose=0)
batch_size = [100]
epochs = [10]
num_neurons = [50,100]
dropout = [0.3]
param_grid = dict(batch_size=batch_size,epochs=epochs,
                      num_neurons=num_neurons,
                      dropout=dropout
                     )
grid = GridSearchCV(estimator=model_lstm,param_grid =param_grid,cv=5,n_jobs=-1)
grid_result_lstm = grid.fit(x_train,ytrain)
grid_result_lstm.best_params_
Out[ ]:
{'batch_size': 100, 'dropout': 0.3, 'epochs': 10, 'num_neurons': 100}
In [ ]:
print('Best score LSTM GridSearchCV: ',grid_result_lstm.best_score_)
print('Best param LSTM GridSearchCV: ',grid_result_lstm.best_params_)
print('Execution time LSTM GridSearchCV: ',grid_result_lstm.refit_time_)
Random Best score LSTM GridSearchCV:  0.9213089227676392
Random Best param LSTM GridSearchCV:  {'batch_size': 100, 'dropout': 0.3, 'epochs': 10, 'num_neurons': 100}
Random execution time LSTM GridSearchCV:  62.25522017478943

Hyper Parameter Tuning for Bi-LSTM

In [ ]:
def neural_network(num_neurons=100,act='relu',
                   dropout=0.3,num_class=num_class,maxlen=maxlen,num_words=num_words):
    
    input_layer = Input(shape=(maxlen,),dtype=tf.int64)

    embed = Embedding(num_words,output_dim=200,input_length=maxlen,weights=[embedding_matrix], trainable=True)(input_layer)

    bi_lstm= Bidirectional(LSTM(128))(embed)

    drop=Dropout(dropout)(bi_lstm)

    dense =Dense(num_neurons,activation='relu')(drop)
    
    out=Dense(num_class,activation='softmax')(dense)

    model_bi_lstm= Model(input_layer,out)

    model_bi_lstm.compile(loss='categorical_crossentropy',optimizer="adam",metrics=['accuracy'])

    return model_bi_lstm
   
model_bi_lstm = KerasClassifier(build_fn=neural_network,verbose=0)
batch_size = [100]
epochs = [10]
num_neurons = [50,100]
dropout = [0.3]
param_grid = dict(batch_size=batch_size,epochs=epochs,
                      num_neurons=num_neurons,
                      dropout=dropout
                     )

grid = GridSearchCV(estimator=model_bi_lstm,param_grid =  param_grid,cv=5,n_jobs=-1)
#es = EarlyStopping(monitor='val_accuracy', mode = 'auto', verbose = 0, patience = 5)  
#mc = ModelCheckpoint('model-{epoch:03d}-{val_accuracy:03f}.h5', verbose=0, monitor='val_accuracy',save_best_only=True, mode='auto')
#lr_reduction = ReduceLROnPlateau(monitor = 'val_loss', factor = 0.2, patience=2, min_lr=0.0001)
grid_result_bi_lstm = grid.fit(x_train,ytrain)

grid_result_bi_lstm.best_params_
Out[ ]:
{'batch_size': 100, 'dropout': 0.3, 'epochs': 10, 'num_neurons': 100}
In [ ]:
print('Best score Bi-LSTM GridSerach: ',grid_result_bi_lstm.best_score_)
print('Best params Bi-LSTM GridSerach: ',grid_result_bi_lstm.best_params_)
print('Execution time Bi-LSTM GridSerach: ',grid_result_bi_lstm.refit_time_)
Best score Bi-LSTM GridSerach:  0.9176104307174683
Best params Bi-LSTM GridSerach:  {'batch_size': 100, 'dropout': 0.3, 'epochs': 10, 'num_neurons': 100}
Execution time Bi-LSTM GridSerach:  126.1610255241394

Hyper Parameter Tuning for GRU

In [ ]:
def neural_network(num_neurons=100,act='relu',
                   dropout=0.3,num_class=num_class,maxlen=maxlen,num_words=num_words):
    
    input_layer = Input(shape=(maxlen,),dtype=tf.int64)

    embed = Embedding(num_words,output_dim=200,input_length=maxlen,weights=[embedding_matrix], trainable=True)(input_layer)

    gru= GRU(128)(embed)

    drop=Dropout(dropout)(gru)

    dense =Dense(num_neurons,activation='relu')(drop)
    
    out=Dense(num_class,activation='softmax')(dense)

    model_gru= Model(input_layer,out)

    model_gru.compile(loss='categorical_crossentropy',optimizer="adam",metrics=['accuracy'])

    return model_gru
   
model_gru = KerasClassifier(build_fn=neural_network,verbose=0)
batch_size = [100]
epochs = [10]
num_neurons = [50,100]
dropout = [0.3]
param_grid = dict(batch_size=batch_size,epochs=epochs,
                      num_neurons=num_neurons,
                      dropout=dropout
                     )
from sklearn.model_selection import RandomizedSearchCV
grid = GridSearchCV(estimator=model_gru,param_grid=param_grid,cv=5,n_jobs=-1)
#es = EarlyStopping(monitor='val_accuracy', mode = 'auto', verbose = 0, patience = 5)  
#mc = ModelCheckpoint('model-{epoch:03d}-{val_accuracy:03f}.h5', verbose=0, monitor='val_accuracy',save_best_only=True, mode='auto')
#lr_reduction = ReduceLROnPlateau(monitor = 'val_loss', factor = 0.2, patience=2, min_lr=0.0001)
grid_result_gru = grid.fit(x_train,ytrain)

grid_result_gru.best_params_
Out[ ]:
{'batch_size': 100, 'dropout': 0.3, 'epochs': 10, 'num_neurons': 50}
In [ ]:
print('Best score GRU GridSearchCV: ',grid_result_gru.best_score_)
print('Best params GRU GridSearchCV: ',grid_result_gru.best_params_)
print('Execution time GRU GridSearchCV: ',grid_result_gru.refit_time_)
Best score GRU GridSearchCV:  0.9216267585754394
Best params GRU GridSearchCV:  {'batch_size': 100, 'dropout': 0.3, 'epochs': 10, 'num_neurons': 50}
Execution time GRU GridSearchCV:  54.90041136741638

13. RESULT COMPARISON

In [ ]:
result_ML_DL_final = pd.read_excel('/content/sample_data/result_ML_DL_final.xlsx')
result_ML_DL_final
Out[ ]:
Model accuracy_training accuracy_test
0 Random Forest 95.991888 94.783715
1 Xgboost 89.609925 86.816158
2 SVC 94.922263 92.334606
3 KNN 91.168635 87.404580
4 Naive Bayes 72.654976 69.258906
5 LSTM 95.510757 93.893129
6 Bi-LSTM 95.594257 93.559158
7 GRU 95.852721 93.988550
In [ ]:
print('Best score LSTM GridSearchCV: ',(grid_result_lstm.best_score_)*100)
print('Best score Bi-LSTM GridSerach: ',(grid_result_bi_lstm.best_score_)*100)
print('Best score GRU GridSearchCV: ',(grid_result_gru.best_score_)*100)
print("Best score Random Forest GridSearchCV: ", ( RF_CV_Fit.best_score_)*100)
Best score LSTM GridSearchCV:  92.13089227676392
Best score Bi-LSTM GridSerach:  91.76104307174683
Best score GRU GridSearchCV:  92.16267585754395
Best score Random Forest GridSearchCV:  94.03951633211271
  1. The GRU model gives a better accuracy among Deep Learning models through Grid Search CV at 92.16%.

  2. The Random Forest model gives highest accuracy for Machine Learning models and all models at 94.03%

  3. We have also seen Random Forest has higher accuracy, precision and recall in the first cut among all models.

In [ ]:
RF_CV_Fit.best_params_
Out[ ]:
{'clf__bootstrap': True,
 'clf__max_depth': None,
 'clf__max_features': 'auto',
 'clf__min_samples_leaf': 1,
 'clf__n_estimators': 100,
 'tfidf__use_idf': False,
 'vect__ngram_range': (1, 2)}
In [ ]:
# Create training and test datasets with 80:20 ratio without augmenatation
X_train, X_test, y_train, y_test = train_test_split(dataset_ML_upsampled.combined_description, 
                                                    dataset_ML_upsampled.target, 
                                                    test_size=0.20, 
                                                    random_state=42)
print('\033[1mShape of the training set:\033[0m', X_train.shape, y_train.shape)
print('\033[1mShape of the test set:\033[0m', X_test.shape, y_test.shape)
Shape of the training set: (25149,) (25149,)
Shape of the test set: (6288,) (6288,)
In [ ]:
rf_tuned  = RandomForestClassifier(bootstrap= True, max_depth=None, max_features='auto', 
                                          min_samples_leaf=1, n_estimators=100)


result = {}   # Create an empty dictionary to later use to store metrics of each of the models

for model, name  in zip([rf_tuned], 
                         ['Random Forest - tuned ']):
    result[name] = fit_n_print(model,X_train, X_test, y_train, y_test)
Algorithm: RandomForestClassifier

 Classification report:
               precision    recall  f1-score   support

           0       0.93      0.95      0.94       682
           1       0.87      1.00      0.93       105
           2       0.98      0.87      0.92       117
           3       1.00      1.00      1.00       126
           4       0.97      0.96      0.96       125
           5       0.99      0.95      0.97       117
           6       1.00      1.00      1.00       114
           7       1.00      1.00      1.00       113
           8       0.98      1.00      0.99       127
           9       0.99      1.00      1.00       106
          10       1.00      0.98      0.99       125
          11       0.97      0.87      0.92       131
          12       0.97      0.98      0.97       115
          13       1.00      1.00      1.00       115
          14       1.00      1.00      1.00       118
          15       1.00      1.00      1.00       125
          16       0.99      0.96      0.98       130
          17       0.98      0.99      0.98       130
          18       1.00      1.00      1.00       110
          19       0.99      1.00      1.00       129
          20       1.00      1.00      1.00       127
          21       0.97      0.98      0.97       127
          22       0.91      0.88      0.90       107
          23       0.99      1.00      1.00       102
          24       0.99      0.97      0.98       115
          25       1.00      0.99      1.00       101
          26       1.00      0.99      1.00       112
          27       0.98      1.00      0.99       115
          28       1.00      1.00      1.00       129
          29       0.99      1.00      1.00       124
          30       0.97      1.00      0.98       120
          31       1.00      1.00      1.00       123
          32       0.97      1.00      0.98       131
          33       0.99      1.00      1.00       121
          34       1.00      0.97      0.98       118
          35       1.00      0.81      0.90       113
          36       0.75      0.96      0.84       122
          37       1.00      1.00      1.00       120
          38       0.92      0.66      0.77       109
          39       0.95      1.00      0.98       123
          40       1.00      1.00      1.00       133
          41       0.93      0.56      0.70       124
          42       0.96      0.92      0.94       111
          43       1.00      1.00      1.00       124
          44       1.00      1.00      1.00       117
          45       0.97      0.48      0.65       126
          46       0.38      0.78      0.51       119
          47       1.00      0.98      0.99       115

    accuracy                           0.95      6288
   macro avg       0.96      0.95      0.95      6288
weighted avg       0.96      0.95      0.95      6288


 Confusion report:
 [[649   0   0 ...   0   0   0]
 [  0 105   0 ...   0   0   0]
 [  2   0 102 ...   0  11   0]
 ...
 [  4   4   2 ...  61  34   0]
 [  6   0   0 ...   0  93   0]
 [  2   0   0 ...   0   0 113]]
Accuracy Score: 0.9473600508905853


 

In [ ]:
result_RF_tuned= pd.DataFrame(np.array(list(result.values()))[:,:-1],    # make a dataframe out of the metrics from result dictionary 
                       columns= ['accuracy_training','accuracy_test',
                                 'recallscore_training', 'recallscore_test', 
                                 'precision_training','precision_test',
                                 'f1score_training', 'f1score_test', 
                                 'Elapsed'],
                      index= result.keys())   # use the model names as index

result_RF_tuned.index.name = 'Model'   # name the index of the result1 dataframe as 'Model'

result_RF_tuned
Out[ ]:
accuracy_training accuracy_test recallscore_training recallscore_test precision_training precision_test f1score_training f1score_test Elapsed
Model
Random Forest - tuned 95.9919 94.736 95.9919 94.736 97.5609 96.0445 96.2936 94.9003 27.0135

The objective of the capstone project, the goal is to build a classifier that can classify the tickets by analyzing text

The Random Forest gives not only better accuracy, but also, better recall, precision.

Hence, proceeding with Random Forest model.

LOADING RANDOM FOREST MODEL

In [ ]:
# Pickling and loading the model Random Forest Model
 #save the model to disk
filename = 'finalized_model.sav'
pickle.dump(rf_tuned, open(filename, 'wb'))
loaded_model = pickle.load(open(filename, 'rb'))
In [ ]:
%%shell
jupyter nbconvert --to html /PATH/TO/YOUR/GL_DecA_G4_NLP1_Final_With_MLandDL.ipynb